In [1]:
import pandas as pd
import numpy as np

from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt 

from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import GridSearchCV

import joblib

In [2]:
# import dataset from csv file
train = pd.read_csv('../Data/train.csv')
# drop stocks, udsVenta_lag_365
train = train.drop(['udsStock', 'udsVenta_lag_365'], axis=1)

In [3]:
# use gridsearchcv with TimeSeriesSplit
param_grid = {'kernel': ['linear'],
                'C': [1, 5, 10],
                'epsilon': [0.1, 0.25, 0.5]}

In [4]:
best_results = pd.DataFrame(columns=['idSku', 'kernel', 'C', 'epsilon', 'rmse'])

In [None]:
skus = train['idSku'].unique()
for s in skus:
    train_s = train[train['idSku'] == s]
    y_train = train_s['udsVenta']
    X_train = train_s.drop(['udsVenta', 'idSku', 'idSecuencia'], axis=1)

    model = SVR()
    tscv = TimeSeriesSplit()
    grid_search = GridSearchCV(model, param_grid, cv=tscv, scoring='neg_root_mean_squared_error', n_jobs=-1, verbose=1)
    grid_search.fit(X_train, y_train)
    best_params = grid_search.best_params_
    best_results = best_results.append({'idSku': s, 'kernel': best_params['kernel'], 'C': best_params['C'], 'epsilon': best_params['epsilon'], 'rmse': -grid_search.best_score_}, ignore_index=True)

    # train model with best params
    best_model = SVR(kernel=best_params['kernel'], C=best_params['C'], epsilon=best_params['epsilon'])
    best_model.fit(X_train, y_train)

    # save model
    path = '../SVR_models/svr_' + str(s) + '.pkl'
    joblib.dump(best_model, path)


In [6]:
best_results.head(50)

Unnamed: 0,idSku,kernel,C,epsilon,rmse
0,1,linear,1,0.1,9.652293
1,2,linear,5,0.1,13.751313
2,3,linear,1,0.1,8.45577
3,4,linear,1,0.25,6.649149
4,5,linear,1,0.5,11.970408
5,6,linear,1,0.5,7.968507
6,7,linear,1,0.25,7.558066
7,8,linear,1,0.5,6.36943
8,9,linear,1,0.25,6.228167
9,10,linear,1,0.5,5.299088


In [7]:
# save the best results
best_results.to_csv('../Data/SVR_best_models.csv', index=False)