<a href="https://colab.research.google.com/github/Pasewark/TimeSeries/blob/main/Default_time_series_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install statsmodels --upgrade

Collecting statsmodels
[?25l  Downloading https://files.pythonhosted.org/packages/0d/7b/c17815648dc31396af865b9c6627cc3f95705954e30f61106795361c39ee/statsmodels-0.12.2-cp36-cp36m-manylinux1_x86_64.whl (9.5MB)
[K     |████████████████████████████████| 9.5MB 5.2MB/s 
Installing collected packages: statsmodels
  Found existing installation: statsmodels 0.10.2
    Uninstalling statsmodels-0.10.2:
      Successfully uninstalled statsmodels-0.10.2
Successfully installed statsmodels-0.12.2


In [8]:
import pandas as pd
from fbprophet import Prophet
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error, median_absolute_error
from sklearn.preprocessing import MinMaxScaler
from xgboost import XGBRegressor
import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
data=pd.read_csv('Bitfinex_BTCUSD_d.csv',skiprows=1)
data.head(10)

Unnamed: 0,unix,date,symbol,open,high,low,close,Volume USD,Volume BTC
0,1613347200000,2021-02-15 00:00:00,BTC/USD,48597.0,48775.0,48430.0,48652.0,3957368.0,81.340299
1,1613260800000,2021-02-14 00:00:00,BTC/USD,47177.861058,49659.0,47030.0,48584.0,356489500.0,7337.590673
2,1613174400000,2021-02-13 00:00:00,BTC/USD,47365.0,48209.0,46255.0,47177.0,246511600.0,5225.250628
3,1613088000000,2021-02-12 00:00:00,BTC/USD,48001.0,48945.0,45851.953566,47397.0,511380400.0,10789.298784
4,1613001600000,2021-02-11 00:00:00,BTC/USD,44840.0,48598.0,43917.419356,48008.0,494500400.0,10300.375218
5,1612915200000,2021-02-10 00:00:00,BTC/USD,46485.105745,47383.0,43777.0,44841.0,449268700.0,10019.14962
6,1612828800000,2021-02-09 00:00:00,BTC/USD,46444.0,48277.0,45060.0,46488.0,739299800.0,15903.023966
7,1612742400000,2021-02-08 00:00:00,BTC/USD,38841.0,46666.0,37947.0,46444.0,1151285000.0,24788.664024
8,1612656000000,2021-02-07 00:00:00,BTC/USD,39237.0,39725.715542,37350.0,38837.0,296232200.0,7627.576274
9,1612569600000,2021-02-06 00:00:00,BTC/USD,38260.879932,40985.0,38205.0,39234.0,399442000.0,10181.017289


In [9]:
def Metrics(preds,true): #in case the predictions len is shorter, assume that predictions are not for first few true values
    #print(preds,true)
    #print(preds.shape)
    #print(true.shape)
    mse=mean_squared_error(preds,true[len(true)-len(preds):])
    mae=median_absolute_error(preds,true[len(true)-len(preds):])
    #maybe add something about predicting outliers
    return mse,mae

def add_to_dicts(results,dicts,name):
    for i in range(len(results)):
        dicts[i][name]=results[i]

#simplest prediction method
def last_value(x_test):
    return x_test[:-1]

def last_few_values(x_test):
    return (x_test[:-3]+x_test[1:-2]+x_test[2:-1])/3

def prophet(x_train,x_test):
    df=pd.DataFrame(x_train,columns=['y'])
    df['ds']=pd.date_range(start='1/1/2018',periods=len(x_train))
    m=Prophet()
    m.fit(df)
    future=m.make_future_dataframe(periods=len(x_test))
    forecast=m.predict(future)
    return forecast['yhat'][-438:]

def AR(x_train,x_test,lags):
    model=AutoReg(x_train,lags=lags)
    model_fit=model.fit()

    #code adapted from https://machinelearningmastery.com/autoregression-models-time-series-forecasting-python/
    coef = model_fit.params
    history = x_train[len(x_train)-lags:]
    history = [history[i] for i in range(len(history))]
    predictions = list()
    for t in range(len(x_test)):
        length = len(history)
        lag = [history[i] for i in range(length-lags,length)]
        yhat = coef[0]
        for d in range(lags):
            yhat += coef[d+1] * lag[lags-d-1]
        predictions.append(yhat)
    return np.array(predictions)

def Arima(x_train,x_test,order):
    model=ARIMA(x_train,order=order)
    model_fit=model.fit()
    preds=[]
    preds.append(model_fit.forecast())
    for t in range(len(x_test)):
        model_fit.append(x_test[t],refit=False) #see if true takes too long
        preds.append(model_fit.forecast())
    return np.array(preds)[:-1]

def xgboost(x_train,x_test,lag):
    #adapted from https://www.kaggle.com/kashnitsky/topic-9-part-1-time-series-analysis-in-python
    x_train_df=pd.DataFrame(x_train,columns=['col'])
    x_test_df=pd.DataFrame(x_test,columns=['col'])
    for i in range(1, lag):
        x_train_df["lag_{}".format(i)] = x_train_df['col'].shift(i)
    for i in range(1, lag):
        x_test_df["lag_{}".format(i)] = x_test_df['col'].shift(i)
    xgb=XGBRegressor()
    x_train_df=np.array(x_train_df)
    x_test_df=np.array(x_test_df)
    x_train_df[:-1,:]
    x_train_df[1:,0]
    xgb.fit(x_train_df[:-1,:],x_train_df[1:,0])
    return xgb.predict(x_test_df)

class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        out = self.fc(out[:, -1, :]) 
        return out

class RNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(RNN, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.lstm = nn.RNN(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        #c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, hn = self.lstm(x)
        out = self.fc(out) 
        return out

class FC(nn.Module):
    def __init__(self,input_dim,hidden_dim,output_dim):
        super(FC,self).__init__()
        self.hidden_dim=hidden_dim
        self.fc1=nn.Linear(input_dim,hidden_dim)
        self.fc2=nn.Linear(hidden_dim,hidden_dim)
        self.fc3=nn.Linear(hidden_dim,hidden_dim)
        self.fc4=nn.Linear(hidden_dim,output_dim)
    def forward(self,x):
        return torch.tanh(self.fc4(F.relu(self.fc3(F.relu(self.fc1(x)))))).squeeze(-1)

class Conv(nn.Module):
    def __init__(self,input_dim):
        super(Conv,self).__init__()
        self.conv1=nn.Conv1d(1,1,3,stride=1)
        #self.conv2=nn.Conv1d(1,1,3,stride=1)
        self.fc1=nn.Linear(17,5)
        self.fc2=nn.Linear(5,1)
    def forward(self,x):
        out=self.conv1(x)
        #print(out.shape)
        #out=self.conv2(out)
        return torch.tanh(self.fc2(F.relu(self.fc1(F.relu(out))))).squeeze(-1)

def train_net(model,x_train,y_train):
    criterion = torch.nn.MSELoss(reduction='mean')
    optimiser = torch.optim.Adam(model.parameters(), lr=.007)
    for t in range(10):
        y_train_pred=model(x_train)
        loss=criterion(y_train_pred,y_train)
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()



In [13]:
#data should be numpy array of time series that contains train and val data together
def run_all(data):
    mse_results={}
    mae_results={}
    dicts=[mse_results,mae_results]
    train_size=int(np.round(len(data)*.8))
    scaler = MinMaxScaler(feature_range=(-1, 1))
    X=scaler.fit_transform(pd.DataFrame(data,columns=['col']))

    x_train=np.array(X[:train_size])
    x_test=np.array(X[train_size:])

    current=Metrics(last_value(x_test),x_test)
    add_to_dicts(current,dicts,'last_value')

    current=Metrics(last_few_values(x_test),x_test)
    add_to_dicts(current,dicts,'last_few_values')

    current=Metrics(prophet(x_train,x_test),x_test)
    add_to_dicts(current,dicts,'Prophet')

    current=Metrics(AR(x_train,x_test,3),x_test)
    add_to_dicts(current,dicts,'AR-3')

    current=Metrics(AR(x_train,x_test,7),x_test)
    add_to_dicts(current,dicts,'AR-7')

    current=Metrics(AR(x_train,x_test,20),x_test)
    add_to_dicts(current,dicts,'AR-20')

    current=Metrics(Arima(x_train,x_test,(5,1,0)),x_test)
    add_to_dicts(current,dicts,'Arima (5,1,0)')

    current=Metrics(Arima(x_train,x_test,(7,1,3)),x_test)
    add_to_dicts(current,dicts,'Arima (7,1,3)')

    current=Metrics(Arima(x_train,x_test,(15,1,5)),x_test)
    add_to_dicts(current,dicts,'Arima (15,1,5)')

    current=Metrics(xgboost(x_train,x_test,5),x_test)
    add_to_dicts(current,dicts,'xgboost 5')

    current=Metrics(xgboost(x_train,x_test,10),x_test)
    add_to_dicts(current,dicts,'xgboost 10')

    current=Metrics(xgboost(x_train,x_test,30),x_test)
    add_to_dicts(current,dicts,'xgboost 30')

    current=Metrics(xgboost(x_train,x_test,100),x_test)
    add_to_dicts(current,dicts,'xgboost 100')

    for d in dicts:
        sorted_dict={k: v for k, v in sorted(d.items(), key=lambda item: item[1])}
        print(sorted_dict)

    return dicts

In [4]:
data=data.reindex(index=data.index[::-1])
print(data)

               unix                 date  ...    Volume USD    Volume BTC
2198     1423440000  2015-02-09 00:00:00  ...  6.493501e+06  29625.030000
2197     1423526400  2015-02-10 00:00:00  ...  6.402351e+06  29268.950000
2196     1423612800  2015-02-11 00:00:00  ...  4.268987e+06  19289.280000
2195     1423699200  2015-02-12 00:00:00  ...  2.468883e+06  11190.650000
2194     1423785600  2015-02-13 00:00:00  ...  6.806973e+06  29079.110000
...             ...                  ...  ...           ...           ...
4     1613001600000  2021-02-11 00:00:00  ...  4.945004e+08  10300.375218
3     1613088000000  2021-02-12 00:00:00  ...  5.113804e+08  10789.298784
2     1613174400000  2021-02-13 00:00:00  ...  2.465116e+08   5225.250628
1     1613260800000  2021-02-14 00:00:00  ...  3.564895e+08   7337.590673
0     1613347200000  2021-02-15 00:00:00  ...  3.957368e+06     81.340299

[2199 rows x 9 columns]


In [10]:
run_all(np.array(data['close']))

INFO:numexpr.utils:NumExpr defaulting to 2 threads.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.

The parameter names will change after 0.12 is released. Set old_names to False to use the new names now. Set old_names to True to use the old names. 


The parameter names will change after 0.12 is released. Set old_names to False to use the new names now. Set old_names to True to use the old names. 


The parameter names will change after 0.12 is released. Set old_names to False to use the new names now. Set old_names to True to use the old names. 



{'last_value': 0.0010030424653286814, 'last_few_values': 0.0015502543139608294, 'xgboost 10': 0.0685091923496665, 'xgboost 30': 0.07252654393702712, 'xgboost 5': 0.07398931845893779, 'xgboost 100': 0.0943633651620381, 'Prophet': 0.1055225381506767, 'Arima (7,1,3)': 0.21018146006557056, 'Arima (15,1,5)': 0.21023179156444632, 'AR-7': 0.21023478122104222, 'Arima (5,1,0)': 0.21028845651635428, 'AR-20': 0.2103770976870858, 'AR-3': 0.21131602467023541}
{'xgboost 10': 0.005403776071861821, 'xgboost 5': 0.005579700953814604, 'xgboost 30': 0.006496008685168142, 'last_value': 0.007101661313926977, 'xgboost 100': 0.007818181700079863, 'last_few_values': 0.009717990414271127, 'Arima (7,1,3)': 0.09968151584494123, 'Arima (15,1,5)': 0.09977941348939023, 'AR-7': 0.09978522736968387, 'Arima (5,1,0)': 0.09988958547487192, 'AR-20': 0.10006183329293927, 'AR-3': 0.10158514981517675, 'Prophet': 0.18286548977348963}


[{'AR-20': 0.2103770976870858,
  'AR-3': 0.21131602467023541,
  'AR-7': 0.21023478122104222,
  'Arima (15,1,5)': 0.21023179156444632,
  'Arima (5,1,0)': 0.21028845651635428,
  'Arima (7,1,3)': 0.21018146006557056,
  'Prophet': 0.1055225381506767,
  'last_few_values': 0.0015502543139608294,
  'last_value': 0.0010030424653286814,
  'xgboost 10': 0.0685091923496665,
  'xgboost 100': 0.0943633651620381,
  'xgboost 30': 0.07252654393702712,
  'xgboost 5': 0.07398931845893779},
 {'AR-20': 0.10006183329293927,
  'AR-3': 0.10158514981517675,
  'AR-7': 0.09978522736968387,
  'Arima (15,1,5)': 0.09977941348939023,
  'Arima (5,1,0)': 0.09988958547487192,
  'Arima (7,1,3)': 0.09968151584494123,
  'Prophet': 0.18286548977348963,
  'last_few_values': 0.009717990414271127,
  'last_value': 0.007101661313926977,
  'xgboost 10': 0.005403776071861821,
  'xgboost 100': 0.007818181700079863,
  'xgboost 30': 0.006496008685168142,
  'xgboost 5': 0.005579700953814604}]