# RNN - AAPL

## `Pandas Reader`

In [1]:
''' Linear Algebra '''
import pandas as pd
import numpy as np

'''Data visualization'''
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")

In [3]:
''' Read financial data'''
from pandas_datareader.data import DataReader

''' Handling time'''
from datetime import datetime

In [4]:
# tech companies

tech_list = ['AAPL','GOOG','MSFT', 'AMZN']

In [5]:
# today

end = datetime.now()
end

datetime.datetime(2021, 6, 29, 21, 46, 35, 519407)

In [7]:
end.year

2021

In [8]:
# two years ago

start = datetime(end.year-2, end.month, end.day)
start

datetime.datetime(2019, 6, 29, 0, 0)

In [9]:
stock_prices = {}

for company in tech_list:
    stock_prices[company] = DataReader(company, "yahoo", start,end)

In [10]:
type(stock_prices)

dict

In [11]:
for k,v in stock_prices.items():
    print("-"*50)
    print(k)
    print(v.shape)
    display(v.head(3))

--------------------------------------------------
AAPL
(504, 6)


Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-07-01,51.122501,50.162498,50.7925,50.387501,109012000.0,49.459972
2019-07-02,50.782501,50.34,50.352501,50.682499,67740800.0,49.749546
2019-07-03,51.110001,50.672501,50.82,51.102501,45448000.0,50.161819


--------------------------------------------------
GOOG
(504, 6)


Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-07-01,1107.579956,1093.703003,1098.0,1097.949951,1436300,1097.949951
2019-07-02,1111.77002,1098.170044,1102.23999,1111.25,991600,1111.25
2019-07-03,1126.76001,1113.859985,1117.410034,1121.579956,767000,1121.579956


--------------------------------------------------
MSFT
(504, 6)


Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-07-01,136.699997,134.970001,136.630005,135.679993,22613500.0,132.745392
2019-07-02,136.589996,135.339996,136.119995,136.580002,15237800.0,133.6259
2019-07-03,137.740005,136.289993,136.800003,137.460007,13629300.0,134.486893


--------------------------------------------------
AMZN
(504, 6)


Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-07-01,1929.819946,1914.660034,1922.97998,1922.189941,3192100,1922.189941
2019-07-02,1934.790039,1906.630005,1919.380005,1934.310059,2645900,1934.310059
2019-07-03,1941.589966,1930.5,1935.890015,1939.0,1690300,1939.0


In [None]:
for k,v in stock_prices.items():
    v["company_name"] = k

In [None]:
data = pd.concat([stock_prices["AAPL"],stock_prices["GOOG"],stock_prices["MSFT"],stock_prices["AMZN"]],axis=0)

In [None]:
data.sample(10)

## Data Visualisation

### Historical Prices

In [None]:
# Let's see a historical view of the closing price


plt.figure(figsize=(15, 6))


for i, company in enumerate(tech_list, 1):
    plt.subplot(2, 2, i)
    stock_prices[company]['Adj Close'].plot()
    plt.ylabel('Adj Close')
    plt.xlabel(None)
    plt.title(f"Closing Price of {tech_list[i - 1]}")
    
plt.tight_layout()

### Volumes Traded

In [None]:
# Let's see a historical view of the closing price


plt.figure(figsize=(15, 6))


for i, company in enumerate(tech_list, 1):
    plt.subplot(2, 2, i)
    stock_prices[company]['Volume'].plot()
    plt.ylabel('Volume')
    plt.xlabel(None)
    plt.title(f"Volume {tech_list[i - 1]}")
    
plt.tight_layout()

### Daily returns

In [None]:
stock_prices["AAPL"]

In [None]:
(44.083302-42.527481)/42.527481

In [None]:
stock_prices["AAPL"]["Adj Close"].pct_change()

In [None]:
for k,v in stock_prices.items():
    v["daily_pct_change"] = v["Adj Close"].pct_change()

In [None]:
for k,v in stock_prices.items():
    print("-"*50)
    print(f"pct_change distribution for {k}")
    sns.displot(v["daily_pct_change"])
    plt.show();

### Comparing tech companies

In [None]:
DataReader(tech_list, "yahoo", start,end)

In [None]:
new_data = DataReader(tech_list, "yahoo", start,end)["Adj Close"]
new_data

In [None]:
daily_returns = new_data.pct_change()
daily_returns

In [None]:
sns.pairplot(daily_returns)

In [None]:
daily_returns.corr()

In [None]:
sns.heatmap(daily_returns.corr(),annot=True, cmap="RdYlBu")

## AAPL Stock Price

In [None]:
apple = stock_prices['AAPL']
apple.tail(5)

In [None]:
apple = stock_prices['AAPL']["Adj Close"]
apple

In [None]:
apple = stock_prices['AAPL']["Adj Close"]
apple

In [None]:
apple.shape

In [None]:
apple = stock_prices['AAPL'][["Adj Close"]]
apple

In [None]:
apple.shape

In [None]:
plt.figure(figsize=(16,6))
plt.title('Close Price History')
plt.plot(apple['Adj Close'])
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.show()

### Temporal Train-Test Split

In [None]:
percentage_split = 0.80

In [None]:
len(apple)*percentage_split

In [None]:
int(len(apple)*percentage_split)

In [None]:
aapl_train = apple[:int(len(apple)*percentage_split)]
aapl_test = apple[int(len(apple)*percentage_split):]

### Scaling

In [None]:
from sklearn.preprocessing import MinMaxScaler

minmax_scaler = MinMaxScaler()
minmax_scaler.fit(aapl_train)

aapl_train_transformed = minmax_scaler.transform(aapl_train)
aapl_test_transformed = minmax_scaler.transform(aapl_test)

In [None]:
minmax_scaler.data_min_

In [None]:
minmax_scaler.data_max_

### Creating subsequences...

**Draft...**

In [None]:
length = 10
horizon = 5

In [None]:
last_possible_index = len(aapl_train) - length - horizon
last_possible_index

In [None]:
random_start = np.random.choice(last_possible_index)
random_start

In [None]:
random_start + length

In [None]:
random_start + length + horizon

In [None]:
one_subsequence = aapl_train.iloc[random_start:random_start + length]
one_target = aapl_train.iloc[random_start + length + horizon]

In [None]:
one_subsequence

In [None]:
one_target

`subsequence`

In [None]:
def subsequence(df, length, horizon):
    last_possible_index = len(df) - length - horizon
    random_start = np.random.choice(last_possible_index)
    X = df[random_start:random_start + length]
    y = df[random_start + length + horizon][0]
    return X,y
    
    

`multiple subsequences`

In [None]:
def multiple_subsequences(df, nb_subsequences, horizon = 5):
    
    list_of_X = []
    list_of_y = []
    
    
    list_of_lengths = np.random.randint(10,15,nb_subsequences)
    
    for length in list_of_lengths:
        tmp_X, tmp_y = subsequence(df, length, horizon)
        list_of_X.append(tmp_X)
        list_of_y.append(tmp_y)
        
    return list_of_X, list_of_y

In [None]:
multiple_subsequences(aapl_train_transformed, 3, horizon = 5)[1]

## LSTM

### Creating train_test_data

In [None]:
X_train, y_train = multiple_subsequences(aapl_train_transformed, 2000, horizon = 5)
X_test, y_test = multiple_subsequences(aapl_test_transformed, 1000, horizon = 5)

In [None]:
type(X_train)

In [None]:
len(X_train)

In [None]:
# [len(X_train[k]) for k in range(len(X_train)]

sns.histplot([len(_) for _ in X_train],bins=5);

In [None]:
sns.histplot([len(_) for _ in X_test],bins=5);

### Padding

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
X_train_pad = pad_sequences(X_train, padding = "post", dtype="float32", value = -999)
X_test_pad = pad_sequences(X_test, padding = "post", dtype="float32", value = -999)

In [None]:
sns.histplot([len(_) for _ in X_train_pad],bins=5);

In [None]:
sns.histplot([len(_) for _ in X_test_pad],bins=5);

In [None]:
X_train_pad.shape

In [None]:
X_test_pad.shape

In [None]:
X_train_pad[0]

### Architecture

In [None]:
from tensorflow.keras import models, layers


model = models.Sequential()

model.add(layers.Masking(mask_value = -999))

model.add(layers.LSTM(units = 32, activation="tanh", return_sequences= True))
model.add(layers.LSTM(units = 16, activation="tanh", return_sequences= True))
model.add(layers.LSTM(units = 8, activation="tanh", return_sequences= False))

model.add(layers.Dense(8,activation="relu"))


model.add(layers.Dense(1,activation="relu")) # or linear 

In [None]:
# model.summary()

### Compile

In [None]:
from tensorflow.keras import metrics

my_mae  = metrics.MeanAbsoluteError()

In [None]:
model.compile(loss="mse",
             optimizer="rmsprop",
             metrics = [my_mae])

### Fit

In [None]:
type(y_train)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

es = EarlyStopping(patience = 10, restore_best_weights = True)


history = model.fit(X_train_pad, np.array(y_train),
                   validation_split = 0.3,
                   batch_size = 8,
                   epochs = 100,
                   callbacks=[es],
                   verbose = 1)

### Evaluate

In [None]:
res = model.evaluate(X_test_pad, np.array(y_test), verbose = 1)

### Predictions

In [None]:
predictions = model.predict(X_test_pad)

The operation computed during a `MinMaxScaler` Operation is:

$$x_{scaled} = \frac{x - x_{min}}{x_{max}- x_{min}}$$

To descale it:

$$x = x_{scaled} * (x_{max}- x_{min}) + x_{min}$$

In [None]:
x_min = minmax_scaler.data_min_
x_min

In [None]:
x_max = minmax_scaler.data_max_
x_max

In [None]:
unscaled_predictions = predictions * (x_max - x_min) + x_min

In [None]:
unscaled_predictions = minmax_scaler.inverse_transform(predictions)

In [None]:
unscaled_predictions

In [None]:
unscaled_y_test = minmax_scaler.inverse_transform(np.array([y_test]))

In [None]:
unscaled_y_test

In [None]:
np.mean(np.abs(unscaled_y_test - unscaled_predictions))