In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import datetime 
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error

from sklearn.model_selection import GridSearchCV
import warnings

import random
random.seed(10)
warnings.filterwarnings("ignore")

In [None]:
# Source: https://github.com/microsoft/ML-For-Beginners/blob/main/7-TimeSeries/3-SVR/README.md


In [None]:
passengers_df = pd.read_csv('data/AIRLINE_PASSENGERS.csv', parse_dates=['Date'])
alcohol_df = pd.read_csv('data/ALCOHOL_SALES.csv', parse_dates=['Date'])
beer_df = pd.read_csv('data/AUS_BEER_PRODUCTION.csv', parse_dates=['Date'])
electric_df = pd.read_csv('data/ELECTRIC_PRODUCTION.csv', parse_dates=['Date'])
minTemp_df = pd.read_csv('data/MIN_TEMP.csv', parse_dates=['Date'])
gdp_df = pd.read_csv('data/NOR_GDP.csv', parse_dates=['Date'])
sunspots_df = pd.read_csv('data/SUNSPOTS.csv', parse_dates=['Date'])
SP_df = pd.read_csv('data/SP.csv', parse_dates=['Date'])
yahoo_df = pd.read_csv('data/YAHOO.csv', parse_dates=['Date'])
tesla_df = pd.read_csv('data/TESLA.csv', parse_dates=['Date'])




def set_name(dfs,names):
    for ind, df in enumerate(dfs):
        df.name = names[ind]
    return dfs
name_list = ['passengers_df', 'alcohol_df', 'beer_df', 
             'electric_df', 'minTemp_df', 'gdp_df', 'sunspots_df','SP_df', 'yahoo_df', 'tesla_df']


dfs_list = []
dfs_list.append(passengers_df)
dfs_list.append(alcohol_df)
dfs_list.append(beer_df)
dfs_list.append(electric_df)
dfs_list.append(minTemp_df)
dfs_list.append(gdp_df)
dfs_list.append(sunspots_df)
dfs_list.append(SP_df)
dfs_list.append(yahoo_df)
dfs_list.append(tesla_df)




dfs_list = set_name(dfs_list, name_list)


In [None]:
#https://github.com/microsoft/ML-For-Beginners/blob/main/7-TimeSeries/3-SVR/README.md

In [None]:
def get_mape(actual, pred):
    return np.mean(np.abs((actual - pred)/actual))*100

In [None]:
def split_train_test(df, ratio, nr_of_forecasts = None):
       
    split_range = int(len(df)* ratio)-nr_of_forecasts
    train, test, forecasts = df[0:split_range], df[split_range:len(df)-nr_of_forecasts], df[len(df)-nr_of_forecasts:len(df)]
     
    train[train.columns[0]] = np.arange(0, len(train))
    test[test.columns[0]] = np.arange(len(train), len(test)+len(train))
    forecasts[forecasts.columns[0]] = np.arange(len(test)+len(train), len(df))
   
    
    return train, test, forecasts

In [None]:
def save_cv(filepath, train_rmse, train_mape, test_rmse, test_mape, fc_rmse, fc_mape):
    
    df = pd.DataFrame({'Data': name_list, 'Train RMSE': train_rmse,'Train MAPE': train_mape,
                       'Test RMSE': test_rmse, 'Test MAPE': test_mape, '1-step RMSE': [x[0] for x in fc_rmse],
                       '3-step RMSE': [x[1] for x in fc_rmse], '5-step RMSE': [x[2] for x in fc_rmse],
                        '10-step RMSE': [x[3] for x in fc_rmse],'1-step MAPE': [x[0] for x in fc_mape],
                       '3-step MAPE': [x[1] for x in fc_mape],'5-step MAPE': [x[2] for x in fc_mape],
                       '10-step MAPE': [x[3] for x in fc_mape] })
    df.to_csv(filepath, index = False)
    return df

In [None]:
def get_accuracy(df, ratio, fcs_nr_list):

    min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
    data = df
    data_Y = data[data.columns[1]].values
    data_Y = data_Y.reshape(-1 ,1)
    train, test, forecasts = split_train_test(data, ratio, nr_of_forecasts=10)

    X_train = train.iloc[:, 0:1].values
    y_train = train.iloc[:, 1].values
    y_train = min_max_scaler.fit_transform(y_train.reshape(-1, 1))
        
        
    model = SVR(C=10,  epsilon=0.05, gamma=0.5,
    kernel='rbf',  verbose=False)
    model.fit(X_train, y_train)
        
        
        

    train_preds = model.predict(X_train)

    
    
    #plt.plot(min_max_scaler.inverse_transform(y_train.reshape(-1, 1)))
    #plt.plot(min_max_scaler.inverse_transform(model.predict(X_train).reshape(-1, 1)))
    #plt.show()

    train_rmse = (mean_squared_error(min_max_scaler.inverse_transform(y_train.reshape(-1, 1)),
                                                min_max_scaler.inverse_transform(train_preds.reshape(-1, 1))))**0.5
    train_mape = get_mape(min_max_scaler.inverse_transform(y_train.reshape(-1, 1)),
                                                min_max_scaler.inverse_transform(train_preds.reshape(-1, 1)))
    print(f'train rmse: {train_rmse}')
    print(f'train mape: {train_mape}')


    X_test = test.iloc[:, 0:1].values
    y_test = test.iloc[:, 1].values
    y_test = min_max_scaler.transform(y_test.reshape(-1, 1))
        
        
    test_forecasts = list()
    for i in range(len(test)):
        X_i, y_i = X_test[i], y_test[i]
        X_train = np.append(X_train, [X_i], axis = 0)
        y_train = np.append(y_train, y_i)
            
        model.fit(X_train, y_train)
        single_fc = model.predict([X_i])
        test_forecasts = np.append(test_forecasts, single_fc)
            
           
    test_forecasts = min_max_scaler.inverse_transform(test_forecasts.reshape(-1, 1))
    y_test = min_max_scaler.inverse_transform(y_test)
    
        
    #plt.plot(y_test)
    #plt.plot(test_forecasts)
    #plt.show()
        
    test_rmse = (mean_squared_error(y_test, test_forecasts))**0.5
    test_mape = get_mape(y_test, test_forecasts)
    print(f'test rmse: {test_rmse}')
    print(f'test mape: {test_mape}')
    
    forecasts_rmse_list = []
    forecasts_mape_list = []
    for fcs in fcs_nr_list:
        X_forecasts = forecasts.iloc[:, 0:1].values[0:fcs]
        y_forecasts = forecasts.iloc[:, 1].values[0:fcs]
        
        y_forecasts = min_max_scaler.fit_transform(y_forecasts.reshape(-1, 1))
        forecasts_predicted = model.predict(X_forecasts)
        
        forecasts_predicted = min_max_scaler.inverse_transform(forecasts_predicted.reshape(-1, 1))
        y_forecasts = min_max_scaler.inverse_transform(y_forecasts.reshape(-1, 1))
        
        
        #plt.plot(y_forecasts, marker = 'o')
        #plt.plot(forecasts_predicted, marker = 'x')
        #plt.show()

        forecast_rmse = np.round((mean_squared_error(y_forecasts,forecasts_predicted)**0.5), 4)
        forecast_mape = np.round(get_mape(y_forecasts,forecasts_predicted), 4)
        forecasts_rmse_list.append(forecast_rmse)
        forecasts_mape_list.append(forecast_mape)
        
    print(f'forecasts rmse: {forecasts_rmse_list}')
    print(f'forecasts smape: {forecasts_mape_list}')
    
    return np.round(train_rmse, 4), np.round(train_mape, 4), np.round(test_rmse, 4), np.round(test_mape, 4), forecasts_rmse_list, forecasts_mape_list
    

In [None]:
train_rmse_list, train_mape_list, test_rmse_list, test_mape_list, fc_rmse_list_list, fc_mape_list_list = [], [], [], [], [], []
nr_of_forecasts = [1, 3, 5, 10]
for df in dfs_list:
    print(df.name)
    train_rmse, train_mape, test_rmse, test_mape, fc_rmse_list, fc_mape_list = get_accuracy(df,0.75, fcs_nr_list = nr_of_forecasts)
    train_rmse_list.append(train_rmse)
    train_mape_list.append(train_mape)
    test_rmse_list.append(test_rmse)
    test_mape_list.append(test_mape)
    fc_rmse_list_list.append(fc_rmse_list)
    fc_mape_list_list.append(fc_mape_list)
results = save_cv("results/SVR_results.csv", train_rmse_list, train_mape_list, test_rmse_list, test_mape_list, fc_rmse_list_list, fc_mape_list_list)

