In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import catboost
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn import metrics
import pickle

pd.options.display.max_columns = 100

In [2]:
train_month_df = pd.read_excel('data/train.xlsx', sheet_name='Monthly').drop(0)
train_quart_df = pd.read_excel('data/train.xlsx', sheet_name='Quarterly').drop(0)

In [3]:
train_month_df['month'] = [int(x[-2:]) for x in train_month_df['Unnamed: 0'].tolist()]
train_month_df['year'] = [int(x[:4]) for x in train_month_df['Unnamed: 0'].tolist()]

train_quart_df['month'] = [int(x[-2:]) for x in train_quart_df['Unnamed: 0'].tolist()]
train_quart_df['year'] = [int(x[:4]) for x in train_quart_df['Unnamed: 0'].tolist()]

train_month_df['year'] -= train_month_df['year'].min()
train_quart_df['year'] -= train_quart_df['year'].min()

train_month_df['month_num'] = train_month_df['month'] + train_month_df['year'] * 12
train_quart_df['month_num'] = train_quart_df['month'] + train_quart_df['year'] * 12

train_month_df.drop('Unnamed: 0', axis=1, inplace=True)
train_quart_df.drop('Unnamed: 0', axis=1, inplace=True)

In [4]:
from utils import classes_quart

In [5]:
for cl in classes_quart:
    for x in cl:
        train_quart_df[x] = (train_quart_df[x].values - np.mean(train_quart_df[x])) / np.std(train_quart_df[x])

In [6]:
from utils import make_features_quart_clas

def get_features(values):
    values = np.array(values)
    
    features, target = [], []

    for i in range(len(values)//2, len(values)):
        features.append(make_features_quart_clas(values[:i]))
        target.append(values[i])

    return features, target

In [7]:
all_features, all_target = [], []

for i in range(len(classes_quart)):
    for col in classes_quart[i]:
        a = train_quart_df[col].dropna().tolist()
        

        features, target = get_features(a)
        all_features.append(features)
        all_target.append([i for z in features])
    
all_features = np.concatenate(all_features, axis=0)
all_target = np.concatenate(all_target, axis=0)

In [8]:
from sklearn.model_selection import StratifiedKFold

models = []
for x in range(5):
    
    model = catboost.CatBoostClassifier(
        random_seed=x,
        learning_rate=0.03
    )
    model.fit(all_features, all_target, verbose=100)
    
    models.append(model)
    
with open('models_quart_clas.pkl', 'wb') as f:
    pickle.dump(models, f)

0:	learn: 1.3464617	total: 74.3ms	remaining: 1m 14s
100:	learn: 0.3421589	total: 900ms	remaining: 8.01s
200:	learn: 0.2004246	total: 1.76s	remaining: 6.98s
300:	learn: 0.1344945	total: 2.54s	remaining: 5.91s
400:	learn: 0.0923593	total: 3.35s	remaining: 5s
500:	learn: 0.0688321	total: 4.15s	remaining: 4.14s
600:	learn: 0.0527362	total: 4.91s	remaining: 3.26s
700:	learn: 0.0424668	total: 5.74s	remaining: 2.45s
800:	learn: 0.0352862	total: 6.54s	remaining: 1.63s
900:	learn: 0.0296963	total: 7.37s	remaining: 809ms
999:	learn: 0.0255612	total: 8.18s	remaining: 0us
0:	learn: 1.3408046	total: 16.4ms	remaining: 16.4s
100:	learn: 0.3331336	total: 850ms	remaining: 7.57s
200:	learn: 0.1979069	total: 1.7s	remaining: 6.75s
300:	learn: 0.1285711	total: 2.53s	remaining: 5.87s
400:	learn: 0.0899365	total: 3.47s	remaining: 5.19s
500:	learn: 0.0662049	total: 4.4s	remaining: 4.38s
600:	learn: 0.0514297	total: 5.18s	remaining: 3.44s
700:	learn: 0.0412792	total: 5.99s	remaining: 2.55s
800:	learn: 0.034054