In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import catboost
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn import metrics
import pickle

pd.options.display.max_columns = 100

In [2]:
train_month_df = pd.read_excel('data/train.xlsx', sheet_name='Monthly').drop(0)
train_quart_df = pd.read_excel('data/train.xlsx', sheet_name='Quarterly').drop(0)

train_month_df['month'] = [int(x[-2:]) for x in train_month_df['Unnamed: 0'].tolist()]
train_month_df['year'] = [int(x[:4]) for x in train_month_df['Unnamed: 0'].tolist()]

train_quart_df['month'] = [int(x[-2:]) for x in train_quart_df['Unnamed: 0'].tolist()]
train_quart_df['year'] = [int(x[:4]) for x in train_quart_df['Unnamed: 0'].tolist()]

train_month_df['year'] -= train_month_df['year'].min()
train_quart_df['year'] -= train_quart_df['year'].min()

train_month_df['month_num'] = train_month_df['month'] + train_month_df['year'] * 12
train_quart_df['month_num'] = train_quart_df['month'] + train_quart_df['year'] * 12

train_month_df.drop('Unnamed: 0', axis=1, inplace=True)
train_quart_df.drop('Unnamed: 0', axis=1, inplace=True)

In [3]:
from utils import classes

In [4]:
for cl in classes:
    for x in cl:
        train_month_df[x] = (train_month_df[x].values - np.mean(train_month_df[x])) / np.std(train_month_df[x])

In [5]:
from utils import make_features_month_clas

def get_features(values):
    values = np.array(values)
    
    features, target = [], []

    for i in range(len(values)//2, len(values)):
        features.append(make_features_month_clas(values[:i]))
        target.append(values[i])

    return features, target

In [6]:
all_features, all_target = [], []

for i in range(len(classes)):
    for col in classes[i]:
        a = train_month_df[col].dropna().tolist()
        

        features, target = get_features(a)
        all_features.append(features)
        all_target.append([i for z in features])
        
all_features = np.concatenate(all_features)
all_target = np.concatenate(all_target)

In [7]:
from sklearn.model_selection import StratifiedKFold

models = []
for x in range(5):
    model = catboost.CatBoostClassifier(
        random_seed=x,
        learning_rate=0.03
    )
    model.fit(all_features, all_target, verbose=100)
    models.append(model)
    
    
with open('models_month_clas.pkl', 'wb') as f:
    pickle.dump(models, f)

0:	learn: 1.5652010	total: 85.7ms	remaining: 1m 25s
100:	learn: 0.6772187	total: 1.08s	remaining: 9.63s
200:	learn: 0.5368824	total: 2.09s	remaining: 8.32s
300:	learn: 0.4527275	total: 3.18s	remaining: 7.39s
400:	learn: 0.3840858	total: 4.16s	remaining: 6.21s
500:	learn: 0.3314286	total: 5.2s	remaining: 5.18s
600:	learn: 0.2911886	total: 6.25s	remaining: 4.15s
700:	learn: 0.2572878	total: 7.24s	remaining: 3.09s
800:	learn: 0.2287026	total: 8.27s	remaining: 2.06s
900:	learn: 0.2052484	total: 9.22s	remaining: 1.01s
999:	learn: 0.1851403	total: 10.2s	remaining: 0us
0:	learn: 1.5694807	total: 23.7ms	remaining: 23.6s
100:	learn: 0.6722392	total: 1.04s	remaining: 9.28s
200:	learn: 0.5287515	total: 2.33s	remaining: 9.26s
300:	learn: 0.4485851	total: 3.79s	remaining: 8.81s
400:	learn: 0.3842105	total: 5.22s	remaining: 7.79s
500:	learn: 0.3330465	total: 6.64s	remaining: 6.62s
600:	learn: 0.2929594	total: 8.11s	remaining: 5.38s
700:	learn: 0.2578606	total: 9.52s	remaining: 4.06s
800:	learn: 0.22