<a href="https://www.kaggle.com/code/aerospacer/time-series?scriptVersionId=126287374" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
#https://www.kaggle.com/datasets/borismarjanovic/price-volume-data-for-all-us-stocks-etfs/code?datasetId=4538&sortBy=voteCount

#https://www.kaggle.com/code/janiobachmann/s-p-500-time-series-forecasting-with-prophet

#https://www.kaggle.com/code/faressayah/stock-market-analysis-prediction-using-lstm

#https://www.kaggle.com/code/prashant111/complete-guide-on-time-series-analysis-in-python

#https://www.kaggle.com/code/shreyasajal/pytorch-forecasting-for-time-series-forecasting

In [None]:
"""
Encoder input layer : nn.Linear()
Positional encoding layer : custom class inheriting from nn.Module
Encoder layer : nn.TransformerEncoderLayer()
Encoder : nn.TransformerEncoder()
Decoder input layer : nn.Linear()
Decoder layer : nn.TransformerDecoderLayer()
Decoder : nn.TransformerDecoder()
Linear mapping : nn.Linear()

"""

In [None]:
!pip install -q yfinance

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# For reading stock data from yahoo
from pandas_datareader.data import DataReader
import yfinance as yf
from pandas_datareader import data as pdr

yf.pdr_override()

# For time stamps
from datetime import datetime

In [None]:
yf


In [None]:
# The tech stocks we'll use for this analysis
tech_list = ['AAPL', 'GOOG', 'MSFT', 'AMZN','TSLA']

# Set up End and Start times for data grab
end = datetime.now()
start = datetime(end.year - 1, end.month, end.day)

for stock in tech_list:
    globals()[stock] = yf.download(stock, start, end)
    

company_list = [AAPL, GOOG, MSFT, AMZN, TSLA]
company_name = ["APPLE", "GOOGLE", "MICROSOFT", "AMAZON","TESLA"]

for company, com_name in zip(company_list, company_name):
    company["company_name"] = com_name
    
df = pd.concat(company_list, axis=0)
df.tail(10)

In [None]:
df.sample(10)

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
TSLA.describe()
#We have only 255 records in one year because weekends are not included in the data.

In [None]:
ma_day = [10, 20, 50]

for ma in ma_day:
    for company in company_list:
        column_name = f"MA for {ma} days"
        company[column_name] = company['Adj Close'].rolling(ma).mean()
        

fig = plt.figure()
fig.set_figheight(10)
fig.set_figwidth(15)

TSLA[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot()
#fig.set_title('TESLA')

fig.tight_layout()

In [None]:
# prediction

# Get the stock quote
df = pdr.get_data_yahoo('TSLA', start='2018-01-01', end='2023-01-01')
# Show teh data
df

In [None]:
plt.figure(figsize=(16,6))
plt.title('Close Price History')
plt.plot(df['Close'])
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.show()

In [None]:
# Create a new dataframe with only the 'Close column 
data = df.filter(['Close'])
# Convert the dataframe to a numpy array
dataset = data.values
# Get the number of rows to train the model on
training_data_len = int(np.ceil( len(dataset) * .80 ))

training_data_len

In [None]:
data

In [None]:
dataset

In [None]:
# Scale the data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)

scaled_data

In [None]:
dataset

In [None]:
# Create the training data set 
# Create the scaled training data set
train_data = scaled_data[0:int(training_data_len), :]
# Split the data into x_train and y_train data sets
x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])

        
# Convert the x_train and y_train to numpy arrays 
x_train, y_train = np.array(x_train), np.array(y_train)

# Reshape the data
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# x_train.shape

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM

# Build the LSTM model
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (x_train.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(x_train, y_train, batch_size=50, epochs=10)

In [None]:
# Create the testing data set
# Create a new array containing scaled values from index 1543 to 2002 
test_data = scaled_data[training_data_len - 60: , :]
# Create the data sets x_test and y_test
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, 0])
    
# Convert the data to a numpy array
x_test = np.array(x_test)

# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

# Get the models predicted price values 
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

# Get the root mean squared error (RMSE)
rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
rmse

In [None]:
data.info()

In [None]:
# Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions
# Visualize the data
plt.figure(figsize=(16,6))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show()

# ARIMA

In [None]:
import statsmodels.api as sm

model = sm.tsa.arima.ARIMA(train_data, order=(1, 1, 1))  
fitted = model.fit()
arima_preds = fitted.forecast(steps=len(test_data))

print(fitted.summary())

In [None]:
fitted.forecast()

In [None]:
from statsmodels.tsa.arima.model import ARIMA

model = ARIMA (train_data, order=(5,1,0))
model_fit = model.fit()
#print(model_fit.summary())

# Make predictions using ARIMA
arima_preds = model.forecast(steps=len(test_data))[0]

# Calculate RMSE and MAE for ARIMA
arima_rmse = np.sqrt(mean_squared_error(test_data['Close'], arima_preds))
arima_mae = mean_absolute_error(test_data['Close'], arima_preds)
print(f"ARIMA RMSE: {arima_rmse:.2f}")
print(f"ARIMA MAE: {arima_mae:.2f}")


# plot residual errors
residuals = pd.DataFrame(model_fit.resid)
residuals.plot()
plt.show()
residuals.plot(kind='kde')
plt.show()
print (residuals.describe())

In [None]:
# Plot the train data, test data, and ARIMA predictions
plt.figure(figsize=(14, 6))
plt.plot(data.index[:train_size], train_data['Close'], label='Train Data', color='blue')
plt.plot(data.index[train_size:], test_data['Close'], label='Test Data', color='green')
plt.plot(data.index[train_size:], arima_preds, label='ARIMA Predictions', color='red', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.title('Tesla Closing Price with ARIMA Predictions')
plt.legend()
plt.show()


In [None]:
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
arima_model = ARIMA(train_data)#, order=best_pdq)
arima_model_fit = arima_model.fit()

# Make predictions using ARIMA
arima_preds = arima_model_fit.forecast(steps=len(test_data))[0]


In [None]:
result = seasonal_decompose(data, model='additive',period=365) # The frequncy is daily
figure = result.plot()

In [None]:
#valid = np.reshape(valid.Close.values, (valid.shape[0], valid.Close.shape[0], 1))

In [None]:
valid

In [None]:
valid.Close

In [None]:
valid['Close']

In [None]:
train.info()

In [None]:
valid.info()

In [None]:
train = train
valid = valid.Close


history = [x for x in train]
predictions = list('train')

# walk-forward validation
for t in range(len(valid)):
    model = ARIMA(history, order=(3,1,3))
    model_fit = model.fit()
    output = model_fit.forecast()
    yhat = output[0]
    predictions.append(yhat)
    obs = test[t]
    history.append(obs)

In [None]:
# evaluate forecasts
rolling_mse = mean_squared_error(test, predictions)
print('Test MSE: %.3f' % rolling_mse)

In [None]:
train = df_train['Close'].values
test = df_valid['Close'].values

In [None]:
train = data[:training_data_len]
valid = data[training_data_len:]

In [None]:
stocks_data=data
df_train = stocks_data[stocks_data.Date < 2021]
df_valid = stocks_data[stocks_data.Date >= 2021]

In [None]:
train = df_train['Close'].values
test = df_valid['Close'].values

In [None]:
history = [x for x in train]
predictions = list()

# walk-forward validation
for t in range(len(valid)):
    model = ARIMA(history, order=(3,1,3))
    model_fit = model.fit()
    output = model_fit.forecast()
    yhat = output[0]
    predictions.append(yhat)
    obs = test[t]
    history.append(obs)

## TimeSeriesTransformer

In [None]:
self.encoder_input_layer = nn.Linear(
  in_features=input_size, 
  out_features=dim_val
)

In [None]:
# Positional Encoder

import torch
import torch.nn as nn 
import math
from torch import nn, Tensor

class PositionalEncoder(nn.Module):
    """
    The authors of the original transformer paper describe very succinctly what 
    the positional encoding layer does and why it is needed:
    
    "Since our model contains no recurrence and no convolution, in order for the 
    model to make use of the order of the sequence, we must inject some 
    information about the relative or absolute position of the tokens in the 
    sequence." (Vaswani et al, 2017)
    Adapted from: 
    https://pytorch.org/tutorials/beginner/transformer_tutorial.html
    """

    def __init__(
        self, 
        dropout: float=0.1, 
        max_seq_len: int=5000, 
        d_model: int=512,
        batch_first: bool=False
        ):

        """
        Parameters:
            dropout: the dropout rate
            max_seq_len: the maximum length of the input sequences
            d_model: The dimension of the output of sub-layers in the model 
                     (Vaswani et al, 2017)
        """

        super().__init__()

        self.d_model = d_model
        
        self.dropout = nn.Dropout(p=dropout)

        self.batch_first = batch_first

        self.x_dim = 1 if batch_first else 0

        # copy pasted from PyTorch tutorial
        position = torch.arange(max_seq_len).unsqueeze(1)
        
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        
        pe = torch.zeros(max_seq_len, 1, d_model)
        
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        
        self.register_buffer('pe', pe)
        
    def forward(self, x: Tensor) -> Tensor:
        """
        Args:
            x: Tensor, shape [batch_size, enc_seq_len, dim_val] or 
               [enc_seq_len, batch_size, dim_val]
        """

        x = x + self.pe[:x.size(self.x_dim)]

        return self.dropout(x)

In [None]:
# Positional Encoder

import positional_encoder as pe

# Create positional encoder
self.positional_encoding_layer = pe.PositionalEncoder(
    d_model=dim_val,
    dropout=dropout_pos_enc,
    max_seq_len=max_seq_len
    )

#The encoder input layer produces an output of size dim_val.

In [None]:
# Create an encoder layer

encoder_layer = nn.TransformerEncoderLayer(
    d_model=dim_val,
    nhead=n_heads, 
    batch_first=True
    )

# Stack the encoder layer n times in nn.TransformerDecoder
self.encoder = nn.TransformerEncoder(
    encoder_layer=encoder_layer,
    num_layers=n_encoder_layers, 
    norm=None
)

self.decoder_input_layer = nn.Linear(
  in_features=num_predicted_features, # the number of features you want to predict. Usually just 1 
  out_features=dim_val
) 

In [None]:

# Create the decoder layer
decoder_layer = nn.TransformerDecoderLayer(
  d_model=dim_val, 
  nhead=n_heads,
  batch_first=True
  )

# Stack the decoder layer n times
self.decoder = nn.TransformerDecoder(
  decoder_layer=decoder_layer,
  num_layers=n_decoder_layers, 
  norm=None
  )

self.linear_mapping = nn.Linear(
  in_features=dim_val,
  out_features=num_predicted_features
  )