# Predicting stocks 📈 using RNN and earning money

In [None]:
''' 
In this project our aim is to train a RNN on Nifty50 stock data from year 2000-2021 and the use it to predict stock prices,
and evaluate the model on our training data by employing a trading strategy and seeing if we make any money or not.

'''

In [None]:
# Importing relevant libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
plt.style.use("seaborn-whitegrid")

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

from tensorflow import keras
from keras.models import Sequential 
from keras.layers import LSTM, Dropout, Dense

In [None]:
def mean_absolute_percent_error(y_true,y_pred):
    # 
    total = np.sum(np.divide(np.array(y_pred),np.array(y_true)))
    return 100*np.abs((len(y_true))-total)/len(y_true)

In [None]:
# Getting the stock data from our data folder
data = pd.read_csv(r'/kaggle/input/nifty50-stock-market-data-2000-2021/LT.csv', sep=",", parse_dates=["Date"])
data["date"] = data["Date"].dt.date
data.set_index("date", inplace=True)
data.drop(['Date'],axis=1,inplace=True)
data.head()

In [None]:
data.describe()

In [None]:
print(data.info())

In [None]:
# Dropping some columns and cleaning the data
data.drop(['Symbol','Series','Trades','Deliverable Volume','%Deliverble','Turnover'],axis=1,inplace=True)
data.head()

In [None]:
''' 
The columns are shown in the plots below, Prev Close, Open, High, Low, Last, Close, and VWAP columns look
very similar because there is usually not much difference between them during a day. 

'''

In [None]:
fig, ax = plt.subplots(8,1, figsize=(15,44))
i=1
for name in data.columns:
    plt.subplot(8,1,i)
    data[name].plot(label=name) # ,color=sns.color_palette()[np.random.choice([0,1,2,3,4])]
    plt.xlabel('Time')
    plt.ylabel(name)
    plt.title(name+' vs time')
    plt.legend()
    i=i+1
plt.subplots_adjust(hspace=0.4)

In [None]:
# Preparing data for training and testing it by applying trading strategy
startdate = dt.datetime.strptime('2018-01-01', "%Y-%m-%d").date()

training_data = data.loc[data.index< startdate]
trading_data = data.loc[data.index>= startdate]

fig, ax = plt.subplots(8,1, figsize=(15,44))
i=1
for name in data.columns:
    plt.subplot(8,1,i)
    training_data[name].plot(label="Training part of the data")
    trading_data[name].plot(label="Trading part of the data")
    plt.axvline(startdate, color='black', ls='--')
    plt.xlabel('Time')
    plt.ylabel(name)
    plt.legend()
    plt.title(name+' vs time (with training-trading split)')
    i=i+1
plt.subplots_adjust(hspace=0.3)

In [None]:
fig, ax = plt.subplots(8,1, figsize=(15,44))
i=1
for name in data.columns:
    plt.subplot(8,1,i)
    trading_data[name].plot()
    plt.xlabel('Time')
    plt.ylabel(name)
    plt.title(name+' vs time (Trading split)')
    i=i+1
plt.subplots_adjust(hspace=0.4)

In [None]:
# We are using the eight columns of our data to do multivariate timeseries forecasting using RNN

# Rescaling the data because LSTM uses sigmoid and tanh that are sensitive to magnitude so the values need to be normalized
scaler = StandardScaler()
scaler = scaler.fit(data)
data_scaled = scaler.transform(data)
data_scaled.shape

In [None]:
start_index = len(training_data)

training_data_scaled = data_scaled[:start_index]

In [None]:
''' 
We are choosing the closing price of our stocks as the parameter to be predicted.

'''

In [None]:
trainX = []
trainY = []

n_future = 1   # Number of days we want to look into the future based on the past days.
n_past = 14  # Number of past days we want to use to predict the future.

for i in range(n_past, len(training_data_scaled) - n_future +1):
    trainX.append(training_data_scaled[i - n_past:i, 0:training_data_scaled.shape[1]])
    trainY.append(training_data_scaled[i + n_future - 1:i + n_future, 5])

trainX, trainY = np.array(trainX), np.array(trainY)

print('trainX shape ',trainX.shape)
print('trainY shape ',trainY.shape)

In [None]:
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=True))
model.add(LSTM(16, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(trainY.shape[1]))

model.compile(optimizer=keras.optimizers.Adam(), loss=keras.losses.MeanSquaredError())
model.summary()

from keras.utils import plot_model
plot_model(model, to_file='model.png',show_shapes=True, show_layer_names=True)

In [None]:
history = model.fit(trainX, trainY, epochs=5, batch_size=16, validation_split=0.2, verbose=1)

In [None]:
fig, ax = plt.subplots(figsize=(10,6))
plt.plot(range(len(history.history['loss'])),history.history['loss'], label='Training loss')
plt.plot(range(len(history.history['val_loss'])),history.history['val_loss'], label='Validation loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('Loss vs Epochs')
plt.legend()

In [None]:
model.save('/kaggle/working/RNN_stock_predictor.h5')

In [None]:
trading_data_scaled = data_scaled[start_index:]

testX = []
testY = []

n_future = 1   # Number of days we want to look into the future based on the past days.
n_past = 14  # Number of past days we want to use to predict the future.

for i in range(n_past, len(trading_data_scaled) - n_future +1):
    testX.append(trading_data_scaled[i - n_past:i, 0:trading_data_scaled.shape[1]])
    testY.append(trading_data_scaled[i + n_future - 1:i + n_future, 5])

testX, testY = np.array(testX,dtype='float32'), np.array(testY,dtype='float32')

print('trainX shape ',testX.shape)
print('trainY shape ',testY.shape)

In [None]:
testY_predicted = model.predict(testX)

In [None]:
print(mean_absolute_percent_error(testY,testY_predicted))

In [None]:
prediction_copies = np.repeat(testY_predicted, 8, axis=-1)
y_pred_future = scaler.inverse_transform(prediction_copies)[:,0]
real_future = np.array(trading_data['Close'][14:])

In [None]:
print('The mean absolute percent error of your model is ',mean_absolute_percent_error(real_future,y_pred_future),'%')

In [None]:
fig, ax = plt.subplots(figsize=(15,5))
plt.plot(real_future,label="True stock price")
plt.plot(y_pred_future,label="Predicted price")
plt.legend()
plt.show()

In [None]:
def calculate_profit(predicted_prices, actual_prices, initial_amount):
    num_days = len(predicted_prices)
    shares = 0  # Number of shares held
    money = initial_amount  # Amount of money available
    
    for i in range(num_days):
        if predicted_prices[i] > actual_prices[i]:
            # Buy shares if predicted price is higher than actual price
            shares_to_buy = money / actual_prices[i]
            shares += shares_to_buy
            money = 0
        elif predicted_prices[i] < actual_prices[i]:
            # Sell shares if predicted price is lower than actual price
            money_from_sales = shares * actual_prices[i]
            money += money_from_sales
            shares = 0
    
    final_amount = money + shares * actual_prices[-1]  # Calculate final amount
    
    return final_amount

In [None]:
initial_cash = 500000
final_cash = calculate_profit(y_pred_future,real_future,initial_cash)
print("Profit earned is : ", final_cash-initial_cash)