In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow_hub as hub
import tensorflow_text as text
df_stock = pd.read_csv("maruti_suzuki/MARUTI.NS.csv",
                 parse_dates = ["Date"],
                 index_col = ["Date"])
df_posts = pd.read_excel("maruti_suzuki/maruti_suzuki_final_posts.xlsx")
# removing the unnecessary columns
df_posts.drop(["Unnamed: 0"], axis=1,inplace=True)
# removing spam posts
df_posts = df_posts[df_posts.Spam==0.0]
df_posts.drop(["Spam"],axis=1,inplace=True)
# sliding a window of 7 days and adding all the TIs
from stock_helper import prepare_data
x,y = prepare_data(df_stock)
final_x = x[np.datetime64("2021-11-13"):]
final_y = y[np.datetime64("2021-11-13"):]
# reversing the posts data
df_posts = df_posts[::-1]
final_posts = df_posts[7:]
# loading the sentiment analysis model
sent_model = tf.keras.models.load_model("final_bert")
# removing duplicates from the data
final_posts.drop_duplicates(subset=['Messages'])
# calculating the sentiments score
sentiments = []
prev = np.datetime64("2015-11-12 21:31:26")
for i in final_y.index:
    total=0
    cnt=0
    for j in final_posts.itertuples():
        _,msg,time = j
        if np.datetime64(time)<np.datetime64(i) and np.datetime64(time)>prev:
            total += tf.squeeze(sent_model.predict([msg])).numpy()
            cnt+=1
    prev = np.datetime64(i)
    if(cnt==0):
        sentiments.append(0)
    else:
        sentiments.append(total/cnt)
# getting indices where sentiments score is 0
zero_index = []
for i,j in enumerate(sentiments):
    if(j==0):
        zero_index.append(i)
# removing all the zero values indices
sentiments = np.delete(sentiments,zero_index)
final_x_zeros = final_x.copy()
final_y_zeros = final_y.copy()
final_y_zeros = final_y_zeros.to_frame()
final_x_zeros['removal_assist'] = np.arange(0,len(final_x),1)
final_y_zeros['removal_assist'] = np.arange(0,len(final_x),1)
final_y_zeros = final_y_zeros[final_y_zeros.removal_assist.isin(zero_index)==False]
final_x_zeros = final_x_zeros[final_x_zeros.removal_assist.isin(zero_index)==False]
# removing the added helper column
final_x_zeros.drop(["removal_assist"], axis=1,inplace=True)
final_y_zeros.drop(["removal_assist"], axis=1, inplace=True)

In [2]:
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
final_x_zeros_scaled = mms.fit_transform(final_x_zeros)
final_x_zeros_scaled

array([[0.41980076, 0.19144392, 0.17350197, ..., 0.57974905, 0.70709467,
        0.54999995],
       [0.36957264, 0.20892048, 0.2366209 , ..., 0.60375047, 0.7176312 ,
        0.54999995],
       [0.2905264 , 0.23642635, 0.33595848, ..., 0.5883486 , 0.6303451 ,
        0.54999995],
       ...,
       [0.95180225, 0.9424734 , 0.94618464, ..., 0.7914287 , 0.7590643 ,
        0.45      ],
       [0.96364975, 0.9706726 , 0.98105717, ..., 0.73870564, 0.7252364 ,
        0.45      ],
       [1.        , 1.        , 1.0000005 , ..., 0.668807  , 0.68463016,
        0.45      ]], dtype=float32)

In [3]:
k = final_y_zeros.max()
final_y_zeros= final_y_zeros/k

In [7]:
model_nbeats = tf.keras.models.load_model("nbeats_maruti suzuki")
model_nbeats.evaluate(final_x_zeros[50:],final_y_zeros[50:]*k)



[310.599365234375, 310.599365234375, 127680.1328125]

In [6]:
tf.random.set_seed(42)
inputs = tf.keras.layers.Input(shape=(71))
x = tf.keras.layers.Lambda(lambda x : tf.expand_dims(x,axis=1))(inputs)
x = tf.keras.layers.LSTM(128,return_sequences=True,dropout=0.1)(x)
x = tf.keras.layers.LSTM(128,return_sequences=True, dropout=0.1)(x)
x = tf.keras.layers.LSTM(128,return_sequences=True,dropout=0.1)(x)
x = tf.keras.layers.LSTM(128,return_sequences=True, dropout=0.1)(x)
x = tf.keras.layers.LSTM(128)(x)
output = tf.keras.layers.Dense(1)(x)
model = tf.keras.models.Model(inputs = inputs, outputs = output)
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 71)]              0         
                                                                 
 lambda_1 (Lambda)           (None, 1, 71)             0         
                                                                 
 lstm_3 (LSTM)               (None, 1, 128)            102400    
                                                                 
 lstm_4 (LSTM)               (None, 1, 128)            131584    
                                                                 
 lstm_5 (LSTM)               (None, 1, 128)            131584    
                                                                 
 lstm_6 (LSTM)               (None, 1, 128)            131584    
                                                                 
 lstm_7 (LSTM)               (None, 128)               1315

In [8]:
model.compile(loss="mae",optimizer = tf.keras.optimizers.Adam(), metrics=["mae","mse"])
history = model.fit(final_x_zeros_scaled[:50],final_y_zeros[:50],
                    epochs = 1000,
                    validation_data = (final_x_zeros_scaled[50:], final_y_zeros[50:]),
                   callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", 
                                                        patience=200, 
                                                        restore_best_weights=True),
                      tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", 
                                                           patience=100, 
                                                           verbose=1)])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000


Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000
Epoch 74/1000
Epoch 75/1000
Epoch 76/1000
Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000


Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000
Epoch 134/1000
Epoch 00134: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 135/1000
Epoch 136/1000
Epoch 137/1000


Epoch 138/1000
Epoch 139/1000
Epoch 140/1000
Epoch 141/1000
Epoch 142/1000
Epoch 143/1000
Epoch 144/1000
Epoch 145/1000
Epoch 146/1000
Epoch 147/1000
Epoch 148/1000
Epoch 149/1000
Epoch 150/1000
Epoch 151/1000
Epoch 152/1000
Epoch 153/1000
Epoch 154/1000
Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000
Epoch 178/1000
Epoch 179/1000
Epoch 180/1000
Epoch 181/1000


Epoch 182/1000
Epoch 183/1000
Epoch 184/1000
Epoch 185/1000
Epoch 186/1000
Epoch 187/1000
Epoch 188/1000
Epoch 189/1000
Epoch 190/1000
Epoch 191/1000
Epoch 192/1000
Epoch 193/1000
Epoch 194/1000
Epoch 195/1000
Epoch 196/1000
Epoch 197/1000
Epoch 198/1000
Epoch 199/1000
Epoch 200/1000
Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/1000
Epoch 210/1000
Epoch 211/1000
Epoch 212/1000
Epoch 213/1000
Epoch 214/1000
Epoch 215/1000
Epoch 216/1000
Epoch 217/1000
Epoch 218/1000
Epoch 219/1000
Epoch 220/1000
Epoch 221/1000
Epoch 222/1000
Epoch 223/1000
Epoch 224/1000
Epoch 225/1000


Epoch 226/1000
Epoch 227/1000
Epoch 228/1000
Epoch 229/1000
Epoch 230/1000
Epoch 231/1000
Epoch 232/1000
Epoch 233/1000
Epoch 234/1000
Epoch 00234: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.


In [9]:
model.compile(loss="mae",optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001), metrics=["mae","mse"])
history = model.fit(final_x_zeros_scaled[:50],final_y_zeros[:50],
                    epochs = 2000,
                    initial_epoch = history.epoch[-1],
                    validation_data = (final_x_zeros_scaled[50:], final_y_zeros[50:]),
                   callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", 
                                                        patience=200, 
                                                        restore_best_weights=True),
                      tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", 
                                                           patience=100, 
                                                           verbose=1)])

Epoch 234/2000
Epoch 235/2000
Epoch 236/2000
Epoch 237/2000
Epoch 238/2000
Epoch 239/2000
Epoch 240/2000
Epoch 241/2000
Epoch 242/2000
Epoch 243/2000
Epoch 244/2000
Epoch 245/2000
Epoch 246/2000
Epoch 247/2000
Epoch 248/2000
Epoch 249/2000
Epoch 250/2000
Epoch 251/2000
Epoch 252/2000
Epoch 253/2000
Epoch 254/2000
Epoch 255/2000
Epoch 256/2000
Epoch 257/2000
Epoch 258/2000
Epoch 259/2000
Epoch 260/2000
Epoch 261/2000
Epoch 262/2000
Epoch 263/2000
Epoch 264/2000
Epoch 265/2000
Epoch 266/2000
Epoch 267/2000
Epoch 268/2000
Epoch 269/2000
Epoch 270/2000
Epoch 271/2000
Epoch 272/2000
Epoch 273/2000
Epoch 274/2000
Epoch 275/2000
Epoch 276/2000
Epoch 277/2000
Epoch 278/2000


Epoch 279/2000
Epoch 280/2000
Epoch 281/2000
Epoch 282/2000
Epoch 283/2000
Epoch 284/2000
Epoch 285/2000
Epoch 286/2000
Epoch 287/2000
Epoch 288/2000
Epoch 289/2000
Epoch 290/2000
Epoch 291/2000
Epoch 292/2000
Epoch 293/2000
Epoch 294/2000
Epoch 295/2000
Epoch 296/2000
Epoch 297/2000
Epoch 298/2000
Epoch 299/2000
Epoch 300/2000
Epoch 301/2000
Epoch 302/2000
Epoch 303/2000
Epoch 304/2000
Epoch 305/2000
Epoch 306/2000
Epoch 307/2000
Epoch 308/2000
Epoch 309/2000
Epoch 310/2000
Epoch 311/2000
Epoch 312/2000
Epoch 313/2000
Epoch 314/2000
Epoch 315/2000
Epoch 316/2000
Epoch 317/2000
Epoch 318/2000
Epoch 319/2000
Epoch 320/2000
Epoch 321/2000
Epoch 322/2000


Epoch 323/2000
Epoch 324/2000
Epoch 325/2000
Epoch 326/2000
Epoch 327/2000
Epoch 328/2000
Epoch 329/2000
Epoch 330/2000
Epoch 331/2000
Epoch 332/2000
Epoch 333/2000
Epoch 334/2000
Epoch 335/2000
Epoch 336/2000
Epoch 337/2000
Epoch 338/2000
Epoch 00338: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.
Epoch 339/2000
Epoch 340/2000
Epoch 341/2000
Epoch 342/2000
Epoch 343/2000
Epoch 344/2000
Epoch 345/2000
Epoch 346/2000
Epoch 347/2000
Epoch 348/2000
Epoch 349/2000
Epoch 350/2000
Epoch 351/2000
Epoch 352/2000
Epoch 353/2000
Epoch 354/2000
Epoch 355/2000
Epoch 356/2000
Epoch 357/2000
Epoch 358/2000
Epoch 359/2000
Epoch 360/2000
Epoch 361/2000
Epoch 362/2000
Epoch 363/2000
Epoch 364/2000
Epoch 365/2000
Epoch 366/2000


Epoch 367/2000
Epoch 368/2000
Epoch 369/2000
Epoch 370/2000
Epoch 371/2000
Epoch 372/2000
Epoch 373/2000
Epoch 374/2000
Epoch 375/2000
Epoch 376/2000
Epoch 377/2000
Epoch 378/2000
Epoch 379/2000
Epoch 380/2000
Epoch 381/2000
Epoch 382/2000
Epoch 383/2000
Epoch 384/2000
Epoch 385/2000
Epoch 386/2000
Epoch 387/2000
Epoch 388/2000
Epoch 389/2000
Epoch 390/2000
Epoch 391/2000
Epoch 392/2000
Epoch 393/2000
Epoch 394/2000
Epoch 395/2000
Epoch 396/2000
Epoch 397/2000
Epoch 398/2000
Epoch 399/2000
Epoch 400/2000
Epoch 401/2000
Epoch 402/2000
Epoch 403/2000
Epoch 404/2000
Epoch 405/2000
Epoch 406/2000
Epoch 407/2000
Epoch 408/2000
Epoch 409/2000
Epoch 410/2000


Epoch 411/2000
Epoch 412/2000
Epoch 413/2000
Epoch 414/2000
Epoch 415/2000
Epoch 416/2000
Epoch 417/2000
Epoch 418/2000
Epoch 419/2000
Epoch 420/2000
Epoch 421/2000
Epoch 422/2000
Epoch 423/2000
Epoch 424/2000
Epoch 425/2000
Epoch 426/2000
Epoch 427/2000
Epoch 428/2000
Epoch 429/2000
Epoch 430/2000
Epoch 431/2000
Epoch 432/2000
Epoch 433/2000
Epoch 434/2000
Epoch 435/2000
Epoch 436/2000
Epoch 437/2000
Epoch 438/2000
Epoch 00438: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-07.


In [10]:
preds = tf.squeeze(model.predict(final_x_zeros_scaled[50:])).numpy()
preds = preds*k.values[0]
preds

array([8506.652, 8530.543, 8552.735, 8562.858, 8570.454, 8602.898,
       8630.671, 8642.096, 8650.431, 8694.767, 8715.952, 8726.964],
      dtype=float32)

In [12]:
mae = sum(abs(preds-(tf.squeeze(final_y_zeros[50:].to_numpy()).numpy())*k.values[0]))
mae

1062.3447265625

In [16]:
(tf.squeeze(final_y_zeros[50:].to_numpy()).numpy())*k.values[0]

array([8820.2 , 8550.95, 8597.3 , 8559.4 , 8515.25, 8593.65, 8550.55,
       8511.65, 8599.45, 8949.45, 8805.45, 8737.15], dtype=float32)

In [17]:
model.save("LSTM_maruti suzuki")



INFO:tensorflow:Assets written to: LSTM_maruti suzuki\assets


INFO:tensorflow:Assets written to: LSTM_maruti suzuki\assets
