<a href="https://colab.research.google.com/github/Patriol-LLC/0625/blob/main/Crash_1000_AUTO_ARIMA_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Begin
print('Beginning Crash 1000 ARMA Model Analysis')

In [None]:
#Loading depedencies
print('\nLoading depedencies and building functions')

#Function definitions
def make_plot(time_data,value_data,label_data,x_label_data='Days',y_label_data='USD'):
    plt.plot(time_data,value_data,label=label_data)
    plt.legend(loc='upper left')
    plt.xlabel(x_label_data)
    plt.ylabel(y_label_data)
    plt.show()

#We're going to build an ARIMA model
#Depedencies
import numpy as np
import pandas as pd
path = ''
#Read CSV
print('\nReading in the data from ',path)
print('=' * 80)
crash_1000 = pd.read_csv(path)

In [None]:
#Explore the data
#Add time element
print('\nPreprocessing the data')
print('=' * 80)
crash_1000['candle'] = np.arange(0,len(crash_1000))

#Drop any unused column
crash_1000.drop(columns={'Unnamed: 0'},inplace=True)
crash_1000.drop(columns={'real_volume'},inplace=True)
crash_1000.drop(columns={'time'},inplace=True)

#Feature engineering
crash_1000['open_close_spread'] = crash_1000['open'] - crash_1000['close']
crash_1000['high_low_spread'] = crash_1000['high'] - crash_1000['low']

#Output
print('\nPlotting Crash 1000 close price')
print('=' * 80)

In [None]:
#Plot the data
import matplotlib.pyplot as plt

#Plot close price
make_plot(crash_1000['candle'],crash_1000['close'],'Crash 1000 Close USD','Time in Days','USD')

#Before using an ARMA model, the data should be stationary

#Output
print('\nPerforming stationarity test')

In [None]:
#Import statsmodels depedencies
from statsmodels.tsa.stattools import adfuller

#ADFuller test
adf , p , lags, nobs , cv , ic = adfuller(crash_1000['close'])

#Interpreting Results
print('\nADF: {} || P-value: {}'.format(adf,p))
print('-' * 80)

#Output
print('\nDecomposing the series into its 3 components')
print('-' * 80)

#Output
print('\nLoading libraries: ')

In [None]:
#Decompose the trend, seasonality and residual
from statsmodels.tsa.seasonal import seasonal_decompose

#Output
print('\nPerforming decmposition: ')

#Perform the decomposition
ss_decompose = seasonal_decompose(crash_1000['close'],model='additive',period=7)

#Output
print('\nIsolating the terms of the time series')

#Seperate the terms of the time series
ss_trend = ss_decompose.trend
ss_seasonal = ss_decompose.seasonal
ss_residual = ss_decompose.resid

#Output
print('\nPlotting Decomposed Close Price: ')

#Print
fig , axes = plt.subplots(4,1,sharex=True,sharey=False)

#Figure height
fig.set_figheight(15)
fig.set_figwidth(10)

#Set axes
axes[0].plot(crash_1000['candle'],crash_1000['close'],label='Original')
axes[0].legend(loc='upper left')

axes[1].plot(crash_1000['candle'],ss_trend,label='Trend')
axes[1].legend(loc='upper left')

axes[2].plot(crash_1000['candle'],ss_seasonal,label='Seasonal')
axes[2].legend(loc='upper left')

axes[3].plot(crash_1000['candle'],ss_residual,label='Residual')
axes[3].legend(loc='upper left')
plt.show()

print('=' * 80)

print('\nLoading autoarima dependencies')
print('-' * 80)

In [None]:
#Autoarima
from pmdarima.arima import auto_arima

#Autoarima model returns model best suited for the data
#p ~ number of autoregressive terms
#q ~ number of moving average terms
#d ~ number of non-seasonal differences
#p , d , q ~ represent non-seasonal components
#P , D, Q  ~ represent seasonal components

#Finding optimal model
print('\nPerforming Autoarima search ')
arima_model = auto_arima(crash_1000['close'], start_p=1, d=1,start_q=1, max_p=5,max_q=5,max_d=5,m=12,start_P=0,D=1,start_Q=0,max_P=5,max_D=5,max_Q=5,seasonal=True, trace=True,error_action='ignore',suppress_warnings=True,stepwise=True,n_fits=50)

#Arima model summary
print('\nModel Summary: ')
print(arima_model.summary())
#SAREIMAX(0,1,1)x(5,1,1,[],12)
print('-' * 60)

#Output
print('\nTrain Test Split ')

original_length = len(crash_1000)

#Split into train & test sets
size = int(len(crash_1000) * (2/3))
x_train , x_test = crash_1000.loc[0:size,'close'] , crash_1000.loc[size:len(crash_1000),'close']

#Ouput done
print("\nSuccuefully performed train and test split" if ( original_length == (len(x_train) + len(x_test)) ) else "\n[ERROR]: Original length and sum of train test split do not tally!")
print('-'*60)


#Output SARIMAX
print('\nLoading SARIMAX Libraries ')
print('-'*60)

In [None]:
#Depedency
from statsmodels.tsa.statespace.sarimax import SARIMAX

#Train the model
print('\nTraining the model ')
sarimax_model = SARIMAX(x_train['close'],order = (0,1,1),seasonal_order = (5,1,1,12))

print('\nFitting the model ')
result = sarimax_mode.fit()

print('\nModel summary ')
result.summary()

#End of file
print('\nEnd of Crash 1000 ARMA Model')
print('-' * 80)