In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import catboost
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn import metrics
from sklearn.model_selection import KFold
import pickle

pd.options.display.max_columns = 100

In [7]:
train_month_df = pd.read_excel('data/train.xlsx', sheet_name='Monthly').drop(0)
train_quart_df = pd.read_excel('data/train.xlsx', sheet_name='Quarterly').drop(0)

In [8]:
train_month_df['month'] = [int(x[-2:]) for x in train_month_df['Unnamed: 0'].tolist()]
train_month_df['year'] = [int(x[:4]) for x in train_month_df['Unnamed: 0'].tolist()]

train_quart_df['month'] = [int(x[-2:]) for x in train_quart_df['Unnamed: 0'].tolist()]
train_quart_df['year'] = [int(x[:4]) for x in train_quart_df['Unnamed: 0'].tolist()]

train_month_df['year'] -= train_month_df['year'].min()
train_quart_df['year'] -= train_quart_df['year'].min()

train_month_df['month_num'] = train_month_df['month'] + train_month_df['year'] * 12
train_quart_df['month_num'] = train_quart_df['month'] + train_quart_df['year'] * 12

train_month_df.drop('Unnamed: 0', axis=1, inplace=True)
train_quart_df.drop('Unnamed: 0', axis=1, inplace=True)

In [9]:
from utils import classes_quart

for cl in classes_quart:
    for x in cl:
        train_quart_df[x] = (train_quart_df[x].values - np.mean(train_quart_df[x])) / np.std(train_quart_df[x])

In [10]:
from utils import make_features_quart_reg

def get_features(values):
    values = np.array(values)

    features, target = [], []

    for i in range(len(values)//2, len(values)):
        features.append(make_features_quart_reg(values[:i]))
        target.append(values[i]-values[i-1])

    return features, target

In [11]:
models = []

for id in range(len(classes_quart)):

    all_features, all_target = [], []
    
    for col in classes_quart[id]:
        a = train_quart_df[col].dropna().tolist()


        features, target = get_features(a)
        all_features.append(features)
        all_target.append(target)
    
    all_features = np.concatenate(all_features, axis=0)
    all_target = np.concatenate(all_target, axis=0)
    
    
    cur_models = []
    for x in range(5):
        
        model = catboost.CatBoostRegressor(
            random_seed=x,
            learning_rate=0.03
        )
        model.fit(all_features, all_target, verbose=False)
        cur_models.append(model)
        
        

    models.append(cur_models)

with open('models_quart_reg.pkl', 'wb') as f:
    pickle.dump(models, f)