# Data Ingestion

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os
import sys

from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from sklearn. metrics import mean_squared_error

from statsmodels.graphics.tsaplots import plot_predict

import warnings
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv('TSLA.csv')
data.head()

In [None]:
stock_data = data[['Date','Close']]

stock_data.head(7)

In [None]:
stock_data.info()

In [None]:
stock_data['Date'] = pd.to_datetime(stock_data['Date'])

In [None]:
stock_data.info()

In [None]:
stock_data.head(2)

In [None]:
stock_data = stock_data.set_index('Date')
stock_data.head(2)

# EDA

In [None]:
stock_data.describe()

In [None]:
plt.plot(stock_data.Close)

In [None]:
plt.hist(stock_data.Close)

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(18,8))
plt.grid(True)
plt.xlabel('Dates', fontsize = 20)
plt.xticks(fontsize = 15)
plt.ylabel('Close Prices', fontsize = 20)
plt.yticks(fontsize = 15)
plt.plot(stock_data.Close, linewidth = 3, color = 'blue')
plt.title('Tesla Stock Closing Price', fontsize = 30)
plt.show()

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(18,8))
plt.grid(True)
plt.xlabel('Dates', fontsize = 20)
plt.xticks(fontsize = 15)
plt.ylabel('Close Prices', fontsize = 20)
plt.yticks(fontsize = 15)
plt.hist(stock_data.Close, linewidth = 3, color = 'blue')
plt.title('Tesla Stock Closing Price', fontsize = 30)
plt.show()

In [None]:
df_close = stock_data.Close

df_close.plot(kind = 'kde', figsize = (18,8), linewidth = 3)

plt.grid('both')
plt.xlabel('Dates', fontsize = 20)
plt.xticks(fontsize = 15)
plt.ylabel('Close Prices', fontsize = 20)
plt.yticks(fontsize = 15)
plt.title('Tesla Stock Closing Price', fontsize = 30)
plt.show()

In [None]:
roll_mean = stock_data.Close.rolling(48).mean()

In [None]:
roll_std = stock_data.Close.rolling(48).std()

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(18,8))
plt.grid(True)
plt.xlabel('Dates', fontsize = 20)
plt.xticks(fontsize = 15)
plt.ylabel('Close Prices', fontsize = 20)
plt.yticks(fontsize = 15)
plt.plot(stock_data.Close, linewidth = 3, color = 'blue')
plt.plot(roll_mean, linewidth = 3, color = 'red')
plt.plot(roll_std, linewidth = 3, color = 'green')
plt.title('Tesla Stock Closing Price', fontsize = 30)
plt.show()

In [None]:
# ADF (Ada Fuller Test) 

from statsmodels.tsa.stattools import adfuller
adft = adfuller(stock_data['Close'])
adft

In [None]:
pd.Series(adft[0:4], index = ['test stats', 'p-value', 'lag', 'data points'])

In [None]:
for key, values in adft[4].items():

In [None]:
# Test for stationary


def test_stationarity(timeseries):
    # determining Rolling statistics
    roll_mean = timeseries.rolling(48).mean()
    roll_std = timeseries.rolling(48).std()
    #plot Rolling statistics
    plt.figure(figsize=(18,8))
    plt.grid('both')
    plt.xlabel('Dates', fontsize = 20)
    plt.xticks(fontsize = 15)
    plt.ylabel('Close Prices', fontsize = 20)
    plt.yticks(fontsize = 15)
    
    plt.plot(timeseries, linewidth = 3, color = 'blue', label='Original')
    plt.plot(roll_mean, linewidth = 3, color = 'red', label='Rolling Mean')
    plt.plot(roll_std, linewidth = 4, color = 'black',  label='Rolling Std')
    
    plt.legend(loc='best', fontsize=20, shadow=True, facecolor='lightpink', edgecolor = 'k')
    
    plt.title('Rolling Mean and Standard Deviation', fontsize = 30)
    plt.show(block=False)
    
    
    print("Results of dickey fuller test")
    adft = adfuller(timeseries, autolag='AIC')
    output = pd.Series(adft[0:4], index = ['test stats', 'p-value', 'lag', 'data points'])
    for key, values in adft[4].items():
        output['critical value (%s)'%key] = values
    print(output)
    

In [None]:
test_stationarity(stock_data.Close)

# DATA DECOMPOSITION

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

result = seasonal_decompose(stock_data.Close, period=12)
result.seasonal

In [None]:
fig = plt.figure(figsize=(20,10))
fig = result.plot()
fig.set_size_inches(17,10)

In [None]:
result = seasonal_decompose(stock_data.Close, period=12, model = 'multiplicative')

In [None]:
fig = plt.figure(figsize=(20,10))
fig = result.plot()
fig.set_size_inches(17,10)

# Check Outliers 

In [None]:
import seaborn as sns

sns.boxplot(stock_data.Close)

In [None]:
fig, ax = plt.subplots(figsize=(20,10))


ax.boxplot(stock_data.Close)

ax.set_xlabel('Variables')

ax.set_ylabel('Values')

ax.set_title('Box Plot')

plt.show()

# Check Auto Correlation

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

plot_acf(stock_data.Close)
plot_pacf(stock_data.Close)

# Preprocessing of the data

##### Convert data into stationary time series

In [None]:
# differencing

df_close = stock_data['Close']
df_close = df_close.diff(1)
df_close

In [None]:
df_close = df_close.dropna()
df_close

In [None]:
test_stationarity(df_close)

In [None]:
fig = plt.figure(figsize=(12,12))

ax1 = fig.add_subplot(211)
fig = plot_acf(df_close, ax=ax1)

ax2 = fig.add_subplot(212)
fig = plot_pacf(df_close, ax=ax2)

In [None]:
# split data into train test split

train_data = df_close[0:-60]
test_data = df_close[-60:]

plt.style.use('ggplot')
plt.figure(figsize=(18,8))
plt.grid(True)
plt.xlabel('Dates', fontsize = 20)
plt.xticks(fontsize = 15)
plt.ylabel('Close Prices', fontsize = 20)
plt.yticks(fontsize = 15)

plt.plot(train_data, linewidth = 5, color = 'green', label = 'Train data')

plt.plot(test_data, linewidth = 5, color = 'blue', label = 'Test data')

plt.legend(fontsize = 20, shadow = True, facecolor='lightpink', edgecolor = 'k')

plt.title('Tesla Stock Closing Price', fontsize = 30)
plt.show()

# Model Building

In [None]:
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from sklearn. metrics import mean_squared_error

In [None]:
history = [x for x in train_data]

In [None]:
model = ARIMA(history, order=(1,1,1))
model

In [None]:
model = model.fit()

In [None]:
model.summary()

In [None]:
model.forecast()

In [None]:
test_data[0]

In [None]:
mean_squared_error([test_data[0]], model.forecast())

In [None]:
import numpy as np
np.sqrt(mean_squared_error([test_data[0]], model.forecast()))

In [None]:
p_values = range(0,3)

d_values = range(0,3)

q_values = range(0,3)

In [None]:
for i in p_values:
    for j in d_values:
        for k in q_values:
            print(i,j,k)

In [None]:
def train_arima_model(X, y, arima_order):
    # prepare training dataset
    # make prediction list
    history = [x for x in X]
    predictions = list()
    for t in range(len(y)):
        model = ARIMA(history, order=arima_order)
        model_fit = model.fit()
        y_hat = model_fit.forecast()[0]
        predictions.append(y_hat)
        history.append(y[t])
        
    # calculate out of sample error
    
    rmse = np.sqrt(mean_squared_error(y, predictions))
    return rmse

In [None]:
def evaluate_models(dataset, test, p_values, d_values, q_values):
    dataset = dataset.astype('float32')
    best_score, best_cfg = float('inf'), None
    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p,d,q)
                try:
                    rmse = train_arima_model(dataset, test, order)
                    if rmse < best_score:
                        best_score, best_cfg = rmse, order
                    print('ARIMA%s RMSE=%.3f' % (order, rmse))
                except:
                    continue
    print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))

In [None]:
import warnings
warnings.filterwarnings('ignore')

p_values = range(0,3)
d_values = range(0,3)
q_values = range(0,3)


evaluate_models(train_data, test_data, p_values, d_values, q_values)

In [None]:
history = [x for x in train_data]

predictions = list()

for i  in range(len(test_data)):
    model = ARIMA(history, order=(1,2,2))
    model = model.fit()
    fc = model.forecast(alpha = 0.05)
    predictions.append(fc)
    history.append(test_data[i])
print(f"RMSE is {np.sqrt(mean_squared_error(test_data,predictions))}")

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(18,8))
plt.grid(True)

plt.plot(range(len(test_data)), test_data, label= 'True Test Close Value', linewidth = 5)
plt.plot(range(len(predictions)), predictions, label= 'Prediction on Test Data', linewidth = 5)


plt.xticks(fontsize = 15)
plt.xticks(fontsize = 15)

plt.legend(fontsize = 20, shadow = True, facecolor='lightpink', edgecolor = 'k')
plt.show()

In [None]:
fc_series = pd.Series(predictions, index=test_data.index)
fc_series

In [None]:
plt.figure(figsize=(12,5), dpi=100)
plt.grid(True)

plt.plot(train_data, label= 'Training', color='blue')
plt.plot(test_data, label= 'Test', color='green', linewidth = 5)
plt.plot(fc_series, label= 'Forecast', color='red')

plt.title('Forecast Vs Actuals on test data')

plt.legend(loc='upper left' , fontsize = 20)
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_predict
fig = plt.figure(figsize=(18,8))
ax1 = fig.add_subplot(111)
plot_predict(result=model, start=1, end=len(df_close)+60, ax=ax1)
plt.grid('both')
plt.legend(['Forecast', 'Close', '95% confidence interval'],fontsize = 20, shadow = True, facecolor='lightpink', edgecolor = 'k')
plt.show()

In [None]:
import warnings
warnings.filterwarnings('ignore')


history = [x for x in train_data]

predictions = list()

for t  in range(len(test_data)):
    model = sm.tsa.statespace.SARIMAX(history, order=(0,1,0), seasonal_order=(1,1,1,3))
    model = model.fit()
    fc = model.forecast(alpha = 0.05)
    predictions.append(fc)
    history.append(test_data[t])
print(f"RMSE of SARIMA Model: {np.sqrt(mean_squared_error(test_data,predictions))}")

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(18,8))
plt.grid(True)

plt.plot(range(len(test_data)), test_data, label= 'True Test Close Value', linewidth = 5)
plt.plot(range(len(predictions)), predictions, label= 'Prediction on Test Data', linewidth = 5)


plt.xticks(fontsize = 15)
plt.xticks(fontsize = 15)

plt.legend(fontsize = 20, shadow = True, facecolor='lightpink', edgecolor = 'k')
plt.show()