<a href="https://colab.research.google.com/github/RajeevBhadola/myrepo/blob/master/ML_Stock_Return_28_Aug_21.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
!wget https://launchpad.net/~mario-mariomedina/+archive/ubuntu/talib/+files/libta-lib0_0.4.0-oneiric1_amd64.deb -qO libta.deb
!wget https://launchpad.net/~mario-mariomedina/+archive/ubuntu/talib/+files/ta-lib0-dev_0.4.0-oneiric1_amd64.deb -qO ta.deb
!dpkg -i libta.deb ta.deb
!pip install ta-lib

In [None]:
import talib

In [None]:
ril = pd.read_csv('/content/sample_data/RIL.csv')

In [None]:
ril.head(2)

In [None]:
import datetime
ril.Date = pd.to_datetime(ril.Date, format = '%d-%m-%Y')
# ril.Date = pd.to_datetime(ril.Date,format="%d-%m-%Y")
ril.set_index('Date', inplace= True)

In [None]:
ril.head(2)

In [None]:
#Compute Returns. It is easier to forecast returns than stock prices.
ril['ret']= ril['Adj Close'].pct_change()
ril= ril.dropna()
ril.shape

In [None]:
# We can also use tensorflow library
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
def split_sequence(series,k):
  X,y = [],[]
  for i in range(len(series)):
    end_id=  i+k
    if end_id > len(series)-1:
      break
    seq_x, seq_y = series[1:end_id], series[end_id]
    x.append(seq_x)
    y.append(seq_y)
  return np.array(x), np.array(y)

In [None]:
x = [1,2,3,4,5,6,7,8,9,10]
split_sequence(x,3)

In [None]:
generator = TimeseriesGenerator(x, x, 3, batch_size=1)
generator[0]

In [None]:
generator[1]

In [None]:
# Divide into training and test dataset
ril_train =ril['2016-08-19':'2020-12-31']
ril_test = ril['2021-01-01':]

In [None]:
ril_train.head(2)

In [None]:
ril_test.head(2)

In [None]:
generator_train =TimeseriesGenerator(ril_train['Adj Close'],
                                         ril_train['Adj Close'], length=3, batch_size=1)
generator_train[0]

In [None]:
generator_test = TimeseriesGenerator(ril_test['Adj Close'], 
                                      ril_test['Adj Close'], length=3, batch_size=1)

In [None]:
ril_train.head(4)

In [None]:
# Build the first model
tf.random.set_seed(13)
model_1 = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_dim=3),tf.keras.layers.Dense(1)])

In [None]:
model_1.summary()

In [None]:
model_1.compile(optimizer='adam', loss ='mse', metrics='mae')

In [None]:
history=model_1.fit(generator_train, epochs=10, validation_data= generator_test)

In [None]:
history.history

In [None]:
results = pd.DataFrame(history.history)
results.head(2)

In [None]:
results[['loss', 'val_loss']].plot();

In [None]:
ril_pred = model_1.predict(generator_test)
ril_pred.shape

In [None]:
ril_pred = tf.squeeze(ril_pred)
ril_pred.numpy().shape

In [None]:
ril_test['Adj Close'].shape

In [None]:
ril_test['Adj Close'][3:].shape

In [None]:
plt.scatter(ril_pred, ril_test['Adj Close'][3:])
plt.xlabel('Predicted Value')
plt.ylabel('Actual Value')
plt.title('Prediction of RIL Prices using Deep Learning');

In [None]:
plt.plot(ril_pred, label = 'Predicted Price')
plt.plot(ril_test['Adj Close'][3:].values, label = 'Actual Prices')
plt.legend();

In [None]:
model_1.evaluate(generator_test)

In [None]:
tf.random.set_seed(13)
model_2 = tf.keras.Sequential([tf.keras.layers.Dense(64, input_dim=3, activation='relu'),
                               tf.keras.layers.Dense(64,activation = 'relu'), 
                               tf.keras.layers.Dense(1)])

In [None]:
model_2.summary()

In [None]:

64*65

In [None]:
model_2.compile(optimizer= tf.keras.optimizers.Adam(learning_rate = 0.0005),
                loss ='mse', metrics ='mae')

In [None]:
history_2 = model_2.fit(generator_train, epochs=50, validation_data= generator_test,verbose=0)

In [None]:
df_model_2 = pd.DataFrame(history_2.history)
df_model_2.head(2)

In [None]:
model_2.evaluate(generator_test)

In [None]:

df_model_2[['loss', 'val_loss']].plot()

In [None]:
df_model_2[['mae','val_mae']].plot()

##Feature Engineering

In [None]:
ril.columns

In [None]:

#What does shift do?
ril['ret_shift']= ril['ret'].shift(1)
ril[['ret', 'ret_shift']].head()
#positive value within shift() returns the values of previous period. 

In [None]:
#5 day MA, 21 day MA and 20 day RSI
ril['MA5']=talib.SMA(ril['Adj Close'],timeperiod=5)/ril['Adj Close'].shift(1)
ril['MA21']=talib.SMA(ril['Adj Close'],timeperiod=21)/ril['Adj Close'].shift(1)
ril['RSI20']=talib.RSI(ril['Adj Close'],timeperiod=20)/ril['Adj Close'].shift(1)

In [None]:
ril.head(5)

In [None]:
#Returns observed 5 sessions back. 
ril['return_5']=ril['Adj Close'].pct_change().shift(5)

In [None]:
Are these new features correlated with returns?
ril[['ret','MA5','MA21','RSI20','return_5']].corr()

In [None]:
#new dataframe containing relevant features
ril = ril[['Adj Close','Volume', 'ret', 'MA5', 'MA21', 'RSI20', 'return_5']]
ril = ril.dropna()
ril.shape


In [None]:
feature_names=['Volume', 'MA5', 'MA21', 'RSI20', 'return_5']
features=ril[feature_names]
target=ril['ret']

In [None]:
features.head(2)

In [None]:
#Divide into train and test dataset
train_size=int(0.75*features.shape[0])
X_train=features[:train_size]
X_test=features[train_size:]
y_train=target[:train_size]
y_test=target[train_size:]

In [None]:
import tensorflow as tf

In [None]:
model1 = tf.keras.Sequential([
    tf.keras.layers.Dense(64,activation='relu'),
    tf.keras.layers.Dense(64,activation='relu'),
    tf.keras.layers.Dense(1)
])

In [None]:
model1.compile(loss="mse",
              optimizer='adam',
              metrics=['mae'])

In [None]:
history = model1.fit(X_train,y_train,epochs=20)

In [None]:
pd.Series(history.history['mae']).plot()