In [81]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import datetime 

# tensorflow 2.6, numpy 1.95 needed 
import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
import itertools
from keras.layers import LSTM
from keras.layers import Dropout
from sklearn.metrics import mean_squared_error

from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
import math
import random
import warnings
warnings.filterwarnings("ignore")
random.seed(10)


In [82]:
passengers_df = pd.read_csv('data/AIRLINE_PASSENGERS.csv', parse_dates=['Date'])
alcohol_df = pd.read_csv('data/ALCOHOL_SALES.csv', parse_dates=['Date'])
beer_df = pd.read_csv('data/AUS_BEER_PRODUCTION.csv', parse_dates=['Date'])
electric_df = pd.read_csv('data/ELECTRIC_PRODUCTION.csv', parse_dates=['Date'])
minTemp_df = pd.read_csv('data/MIN_TEMP.csv', parse_dates=['Date'])
gdp_df = pd.read_csv('data/NOR_GDP.csv', parse_dates=['Date'])
#population_df = pd.read_csv('data/POPULATION.csv', parse_dates=['Date'])
sunspots_df = pd.read_csv('data/SUNSPOTS.csv', parse_dates=['Date'])
SP_df = pd.read_csv('data/SP.csv', parse_dates=['Date'])
yahoo_df = pd.read_csv('data/YAHOO.csv', parse_dates=['Date'])
tesla_df = pd.read_csv('data/TESLA.csv', parse_dates=['Date'])




def set_name(dfs,names):
    for ind, df in enumerate(dfs):
        df.name = names[ind]
    return dfs
name_list = ['passengers_df', 'alcohol_df', 'beer_df', 
             'electric_df', 'minTemp_df', 'gdp_df', 'sunspots_df','SP_df', 'yahoo_df', 'tesla_df']


dfs_list = []
dfs_list.append(passengers_df)
dfs_list.append(alcohol_df)
dfs_list.append(beer_df)
dfs_list.append(electric_df)
dfs_list.append(minTemp_df)
dfs_list.append(gdp_df)
dfs_list.append(sunspots_df)
dfs_list.append(SP_df)
dfs_list.append(yahoo_df)
dfs_list.append(tesla_df)




dfs_list = set_name(dfs_list, name_list)


In [83]:
def split_train_test(df, ratio, nr_of_forecasts):
    split_range = int(len(df)* ratio)
    train, test, forecasts = df[0:split_range],df[split_range:len(df)-nr_of_forecasts], df[len(df)-nr_of_forecasts:len(df)]  
    return train, test, forecasts

In [84]:
def timeseries_to_supervised(data, lag=1):
    df = pd.DataFrame(data)
    columns = [df.shift(i) for i in range(1, lag+1)]
    columns.append(df)
    df = pd.concat(columns, axis=1)
    df.fillna(0, inplace=True)
    return df

In [85]:
def flatten(t):
    return [item for sublist in t for item in sublist]

In [86]:
def get_mape(actual, pred):
    return np.mean(np.abs((actual - pred)/actual))*100

In [87]:
def get_accuracy(df, ratio, fcs_nr_list):

    data = df[df.columns[1]].values.reshape(-1, 1)
    min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))
    data_scaled = min_max_scaler.fit_transform(data)
    data_scaled = timeseries_to_supervised(data_scaled).values

    train, test, forecasts = split_train_test(data_scaled, ratio, nr_of_forecasts=10)

    X_train, y_train = train[:, 0:-1], train[:, -1]
    X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])

    model = Sequential()
    model.add(LSTM(units = 4, activation = 'relu', batch_input_shape=(1, X_train.shape[1], X_train.shape[2]), stateful=True))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')

    for i in range(100):
        model.fit(X_train, y_train, epochs= 1, batch_size=1, verbose=0, shuffle=False)
        model.reset_states()
    
    #model.fit(X_train, y_train, epochs = 50, batch_size = 1, verbose =0, shuffle = False )
    
    train_forecasts = model.predict(X_train, batch_size=1)
    
    train_forecasts = np.ravel(min_max_scaler.inverse_transform(train_forecasts))
    y_train = np.ravel(min_max_scaler.inverse_transform(y_train.reshape(-1, 1)))
   
    train_rmse = np.round((mean_squared_error(train_forecasts, y_train))**0.5, 4)
    train_mape = np.round(get_mape(train_forecasts, y_train), 4)
    
    #plt.plot(y_train)
    #plt.plot(train_forecasts)
    #plt.show()
    print(f'train rmse: {train_rmse}')
    print(f'train mape: {train_mape}')
    
    
    
    X_train, y_train = train[:, 0:-1], train[:, -1]
    X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
    
    test_fcs = np.array([])
    for i in range(len(test)):
        model.fit(X_train, y_train, epochs= 1, batch_size=1, verbose=0, shuffle=False)
        
        X_test, y_test = test[i, 0:-1], test[i, -1]
        X_test = X_test.reshape(X_test.shape[0], 1, 1)
        #print(X_test)
        
        single_fc = model.predict([X_test], batch_size = 1)
        #print(single_fc)
        test_fcs = np.append(test_fcs, single_fc[0][0])
        
        X_train = np.vstack((X_train, X_test))
        y_train = np.append(y_train, y_test)
        
        
        
        #print(f'X_train: {X_train}')
        #print(f'y_train: {y_train}')
    
    test_fcs = np.ravel(min_max_scaler.inverse_transform(test_fcs.reshape(-1 ,1)))
    y_test = test[:, -1]
    y_test = np.ravel(min_max_scaler.inverse_transform(y_test.reshape(-1, 1)))
    
    test_rmse = np.round((mean_squared_error(test_fcs, y_test))**0.5, 4)
    test_mape = np.round(get_mape(test_fcs, y_test), 4)
    print(f'test rmse: {test_rmse}')
    print(f'test mape: {test_mape}')
    
    #plt.plot(y_test)
    #plt.plot(test_fcs)
    #plt.show()
    forecasts_rmse_list = []
    forecasts_mape_list = []
    for fcs in fcs_nr_list:
        X_fc, y_fc = forecasts[0:fcs, 0:-1], forecasts[:fcs,-1]
        X_fc = X_fc.reshape(X_fc.shape[0], 1, X_fc.shape[1])
        fc_predictions = model.predict(X_fc, batch_size = 1)
        
        fc_predictions =   np.ravel(min_max_scaler.inverse_transform(fc_predictions))
        y_fc = np.ravel(min_max_scaler.inverse_transform(y_fc.reshape(-1, 1)))
        
        fc_rmse = np.round(mean_squared_error(fc_predictions, y_fc)**0.5, 4)
        fc_mape = np.round(get_mape(fc_predictions, y_fc), 4)
        print(fc_rmse, fc_mape)
        forecasts_rmse_list.append(fc_rmse)
        forecasts_mape_list.append(fc_mape)
    print(f'forecast rmse: {fc_rmse_list}')
    print(f'forecast smape: {fc_mape_list}')
    
    return train_rmse, train_mape, test_rmse, test_mape, forecasts_rmse_list, forecasts_mape_list
    
    

In [88]:
def save_cv(filepath, train_rmse, train_mape, test_rmse, test_mape, fc_rmse, fc_mape):
    
    df = pd.DataFrame({'Data': name_list, 'Train RMSE': train_rmse,'Train MAPE': train_mape,
                       'Test RMSE': test_rmse, 'Test MAPE': test_mape, '1-step RMSE': [x[0] for x in fc_rmse],
                       '3-step RMSE': [x[1] for x in fc_rmse], '5-step RMSE': [x[2] for x in fc_rmse],
                        '10-step RMSE': [x[3] for x in fc_rmse],'1-step MAPE': [x[0] for x in fc_mape],
                       '3-step MAPE': [x[1] for x in fc_mape],'5-step MAPE': [x[2] for x in fc_mape],
                       '10-step MAPE': [x[3] for x in fc_mape] })
    df.to_csv(filepath, index = False)
    return df

In [89]:
ff

NameError: name 'ff' is not defined

In [90]:
train_rmse_list, train_mape_list, test_rmse_list, test_mape_list, fc_rmse_list, fc_mape_list = [], [], [], [], [], []
nr_of_forecasts = [1, 3, 5, 10]
for df in dfs_list:
    print(df.name)
    train_rmse, train_mape, test_rmse, test_mape, fc_rmse, fc_mape = get_accuracy(df, 0.75, fcs_nr_list= nr_of_forecasts)
    train_rmse_list.append(train_rmse)
    train_mape_list.append(train_mape)
    test_rmse_list.append(test_rmse)
    test_mape_list.append(test_mape)
    fc_rmse_list.append(fc_rmse)
    fc_mape_list.append(fc_mape)
results = save_cv("results/LSTM_results.csv", train_rmse_list, train_mape_list, test_rmse_list, test_mape_list, fc_rmse_list, fc_mape_list)


passengers_df
train rmse: 38.2594
train mape: 10.0177
test rmse: 62.384
test mape: 11.9554
26.7092 6.8085
50.8245 12.0849
109.6934 21.6098
102.3205 19.5118
forecast rmse: []
forecast smape: []
alcohol_df


KeyboardInterrupt: 

In [None]:
ff

NameError: name 'ff' is not defined