In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from sklearn.preprocessing import StandardScaler, MaxAbsScaler
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error
from sklearn import linear_model
import matplotlib.pyplot as plt # plotting
import seaborn as sns
%matplotlib inline

In [None]:
df = pd.read_csv('/kaggle/input/real-time-advertisers-auction/Dataset.csv')

In [None]:
df['date'] = pd.to_datetime(df['date'])
df['date'] = df['date'].apply(lambda x : x.date())
df['weekday'] = df['date'].apply(lambda x : x.weekday())

In [None]:
df.nunique()

In [None]:
def weird_division(n, d):
    return n / d if d else 0

df['CPM'] = df.apply(lambda x: weird_division(((x['total_revenue']*100)),
                                              x['measurable_impressions'])*1000 , axis=1)

In [None]:
# df.drop(['order_id' , 'line_item_type_id'], axis = 1, inplace=True)
df.drop(['integration_type_id' , 'revenue_share_percent'], axis = 1, inplace=True)
df.drop(['total_revenue'], axis = 1, inplace=True)

In [None]:
df['View/measurable'] = df.apply(lambda x: weird_division(x['viewable_impressions'],
                                                          x['measurable_impressions']) , axis=1)

# Split to train and test

In [None]:
train_df = df[df['date'] <= pd.to_datetime('2019-06-17')].copy()
validation_df = df[(df['date'] > pd.to_datetime('2019-06-17')) & 
                   (df['date'] <= pd.to_datetime('2019-06-21'))].copy()
test_df = df[df['date'] > pd.to_datetime('2019-06-21')].copy()

train_df['date'] = train_df['date'].astype('str')
validation_df['date'] = validation_df['date'].astype('str')
test_df['date'] = test_df['date'].astype('str')

In [None]:
train_df = train_df[train_df['CPM'] >= 0]
train_df = train_df[train_df['CPM'] < train_df['CPM'].quantile(0.95)]
train_df.reset_index(drop=True, inplace = True)

validation_df = validation_df[validation_df['CPM'] >= 0]
validation_df = validation_df[validation_df['CPM'] < validation_df['CPM'].quantile(0.95)]
validation_df.reset_index(drop=True, inplace = True)

test_df = test_df[test_df['CPM'] >= 0]
test_df = test_df[test_df['CPM'] < test_df['CPM'].quantile(0.95)]
test_df.reset_index(drop=True, inplace = True)

In [None]:
df_numericals = train_df[['viewable_impressions', 
                    'measurable_impressions', 'View/measurable', 'total_impressions']]

scaler = MaxAbsScaler()
scaler.fit(df_numericals)

# scaled_data = scaler.transform(df_numericals)

In [None]:
def transform_data(df, scaler):
    df_numericals = df[['viewable_impressions', 
                    'measurable_impressions', 'View/measurable', 'total_impressions']]
    
    scaled_data = scaler.transform(df_numericals)
    scaled_data = pd.DataFrame(scaled_data, columns=['viewable_impressions',
                    'measurable_impressions', 'View/measurable', 'total_impressions'])
    
    df['viewable_impressions'] = scaled_data['viewable_impressions']
    df['measurable_impressions'] = scaled_data['measurable_impressions']
    df['View/measurable'] = scaled_data['View/measurable']
    df['total_impressions'] = scaled_data['total_impressions']
    
    
    df.drop('ad_type_id', axis = 1, inplace = True)
    return df

In [None]:
train_df = transform_data(train_df, scaler)
validation_df = transform_data(validation_df, scaler)
test_df = transform_data(test_df, scaler)

In [None]:
train_data = train_df.drop(labels = 'CPM', axis = 1)
train_value = train_df['CPM']

validation_data = validation_df.drop(labels = 'CPM', axis = 1)
validation_value = validation_df['CPM']

test_data = test_df.drop(labels = 'CPM', axis = 1)
test_value = test_df['CPM']

In [None]:
cols = [
        'weekday',
        'site_id', 
        'geo_id', 
        'device_category_id',
        'advertiser_id',
        'os_id',
        'monetization_channel_id',
        'ad_unit_id',
        'total_impressions',
        'viewable_impressions',
        'measurable_impressions',
#         'View/measurable',
        'order_id',
        'line_item_type_id'
       ]
train_data = train_data[cols]
validation_data = validation_data[cols]
test_data = test_data[cols]

In [None]:
train_data[['weekday',
        'site_id', 
        'geo_id', 
        'device_category_id',
        'advertiser_id',
        'os_id',
        'monetization_channel_id',
        'ad_unit_id',
        'order_id',
        'line_item_type_id']].nunique().sum() + 3

In [None]:
cat_features = [
                'site_id', 
                'geo_id', 
                'device_category_id',
                'advertiser_id',
                'os_id',
                'monetization_channel_id',
                'ad_unit_id',
                'weekday',
                'order_id',
                'line_item_type_id'
               ]

model = CatBoostRegressor(cat_features = cat_features, max_depth = 9,
                          learning_rate = 0.3, iterations = 300)
# catboost with parameters work faster, it's better to test
# and enougth good...
# model = CatBoostRegressor(cat_features = cat_features)


In [None]:
model.fit(train_data, train_value)

In [None]:
prediction = model.predict(train_data)
mean_squared_error(train_value, prediction)

In [None]:
prediction = model.predict(validation_data)
mean_squared_error(validation_value, prediction)

In [None]:
prediction = model.predict(test_data)
mean_squared_error(test_value, prediction)

🔥🔥🔥

In [None]:
first_key = ord('а')
last_key = ord('я')
pass_phrase = "цезарь"
alphabet = [chr(i) for i in range(first_key, last_key + 1)]
alphabet.extend(['!', ' ', ':', ')'])

In [None]:
def encode_phrase(alphabet, pass_phrase, my_phrase):
    index_pass_phrase = 0
    encoded_phrase = ''
    for symbol in my_phrase:
        key_pass_phrase = alphabet.index(pass_phrase[index_pass_phrase])
        key_my_phrase = alphabet.index(symbol.lower())
        new_key = (key_pass_phrase + key_my_phrase) % (len(alphabet))
        encoded_phrase += alphabet[new_key]
        index_pass_phrase += 1
        index_pass_phrase %= len(pass_phrase)
    return encoded_phrase

def decode_phrase(alphabet, pass_phrase, my_phrase):
    index_pass_phrase = 0
    decoded_phrase = ''
    for symbol in my_phrase:
        key_pass_phrase = alphabet.index(pass_phrase[index_pass_phrase])
        key_my_phrase = alphabet.index(symbol.lower())
        new_key = (key_my_phrase - key_pass_phrase) % (len(alphabet))
        decoded_phrase += alphabet[new_key]
        index_pass_phrase += 1
        index_pass_phrase %= len(pass_phrase)
    decoded_phrase = decoded_phrase[0].upper() + decoded_phrase[1:]
    return decoded_phrase

In [None]:
encoded_phrase = "зуюун)цьмтнъх"
print(decode_phrase(alphabet, pass_phrase, encoded_phrase))