In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM,Dense
from keras.preprocessing.sequence import TimeseriesGenerator

In [2]:
file = 'data/GOOG.csv'
df = pd.read_csv(file)

In [3]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       250 non-null    object 
 1   Open       250 non-null    float64
 2   High       250 non-null    float64
 3   Low        250 non-null    float64
 4   Close      250 non-null    float64
 5   Adj Close  250 non-null    float64
 6   Volume     250 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 13.8+ KB
None


In [4]:
df.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2022-08-08,119.120003,120.860001,117.830002,118.139999,118.139999,17061100
1,2022-08-09,117.989998,118.199997,116.559998,117.5,117.5,15424300
2,2022-08-10,119.589996,121.779999,119.360001,120.650002,120.650002,20497000
3,2022-08-11,122.080002,122.339996,119.550003,119.82,119.82,16671600
4,2022-08-12,121.160004,122.650002,120.400002,122.650002,122.650002,16121100
5,2022-08-15,122.209999,123.260002,121.57,122.879997,122.879997,15525000
6,2022-08-16,122.32,123.227997,121.535004,122.510002,122.510002,15626200
7,2022-08-17,120.93,122.150002,120.199997,120.32,120.32,17589200
8,2022-08-18,120.230003,121.690002,119.550003,120.860001,120.860001,15652000
9,2022-08-19,119.870003,120.0,117.669998,118.120003,118.120003,20187000


In [5]:
df['Date'] = pd.to_datetime(df['Date'])
df.set_index(df['Date'],inplace=True)
df.drop(columns = ['Open','High','Low','Volume'],inplace = True)

In [6]:
import plotly.graph_objects as go

In [7]:
trace1 = go.Scatter(
    x = df['Date'],
    y = df['Close'],
    mode = 'lines',
    name = 'Data'
)
layout = go.Layout(
    title = "Google Stock",
    xaxis = {'title':"Date"},
    yaxis = {'title':"Close"}
)
fig = go.Figure(data = [trace1],layout = layout)
fig.show()

In [8]:
close_data = df['Close'].values

In [9]:
close_data

array([118.139999, 117.5     , 120.650002, 119.82    , 122.650002,
       122.879997, 122.510002, 120.32    , 120.860001, 118.120003,
       115.07    , 114.769997, 114.699997, 117.699997, 111.300003,
       110.339996, 109.910004, 109.150002, 110.550003, 108.68    ,
       107.480003, 110.480003, 109.419998, 111.779999, 111.870003,
       105.309998, 105.870003, 103.900002, 103.629997, 103.849998,
       101.830002, 100.010002, 100.57    ,  99.169998,  98.809998,
        98.089996, 100.739998,  98.089996,  96.150002,  99.300003,
       102.410004, 102.220001, 102.239998,  99.57    ,  98.709999,
        98.050003,  98.300003,  99.709999,  97.18    , 100.779999,
       101.389999, 100.290001, 100.529999, 101.480003, 102.970001,
       104.93    ,  94.82    ,  92.599998,  96.580002,  94.660004,
        90.5     ,  87.07    ,  83.489998,  86.699997,  88.650002,
        88.910004,  87.400002,  94.169998,  96.730003,  96.029999,
        98.720001,  98.989998,  98.5     ,  97.800003,  95.830

In [11]:
#reshape the array into a two-dimensional array with one column
close_data = close_data.reshape(-1,1)

In [12]:
close_data

array([[118.139999],
       [117.5     ],
       [120.650002],
       [119.82    ],
       [122.650002],
       [122.879997],
       [122.510002],
       [120.32    ],
       [120.860001],
       [118.120003],
       [115.07    ],
       [114.769997],
       [114.699997],
       [117.699997],
       [111.300003],
       [110.339996],
       [109.910004],
       [109.150002],
       [110.550003],
       [108.68    ],
       [107.480003],
       [110.480003],
       [109.419998],
       [111.779999],
       [111.870003],
       [105.309998],
       [105.870003],
       [103.900002],
       [103.629997],
       [103.849998],
       [101.830002],
       [100.010002],
       [100.57    ],
       [ 99.169998],
       [ 98.809998],
       [ 98.089996],
       [100.739998],
       [ 98.089996],
       [ 96.150002],
       [ 99.300003],
       [102.410004],
       [102.220001],
       [102.239998],
       [ 99.57    ],
       [ 98.709999],
       [ 98.050003],
       [ 98.300003],
       [ 99.7

In [13]:
#80% of your data for training
split_percentage = 0.80

split = int(split_percentage * len(close_data))

In [14]:
split

200

In [15]:
close_train = close_data[:split]
close_test = close_data[split:]

In [16]:
date_train = df['Date'][:split]
date_test = df['Date'][split:]

In [17]:
print(len(close_train))

200


In [18]:
print(len(close_test))

50


In [20]:
look_back = 15 #number of time steps to look back
train_generator = TimeseriesGenerator(close_train,close_train,length = look_back,batch_size = 20)
test_generator = TimeseriesGenerator(close_test,close_test,length = look_back,batch_size = 1)

In [22]:

model = Sequential()

model.add(
    LSTM(10,activation = 'relu',input_shape =(look_back,1))
)
model.add(
    Dense(1)
)



In [23]:
model.compile(optimizer='adam',loss='mse')

In [24]:
model.fit_generator(train_generator,epochs = 100,verbose=1)

Epoch 1/100



`Model.fit_generator` is deprecated and will be removed in a future version. Please use `Model.fit`, which supports generators.



Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

<keras.src.callbacks.History at 0x7b8ea5be7a60>

In [25]:
prediction = model.predict_generator(test_generator)


`Model.predict_generator` is deprecated and will be removed in a future version. Please use `Model.predict`, which supports generators.



In [26]:
model.save('stock_predictor.h5')


You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.



In [27]:
close_train = close_train.reshape((-1))
close_test = close_test.reshape((-1))
prediction = prediction.reshape((-1))

In [28]:
prediction

array([124.16037 , 125.129524, 124.24587 , 123.94444 , 122.04706 ,
       123.31016 , 122.9005  , 120.31565 , 119.56241 , 120.498665,
       120.03036 , 120.62143 , 120.44545 , 121.84124 , 121.052536,
       120.43216 , 117.99793 , 117.93641 , 118.94493 , 122.74477 ,
       124.4466  , 124.69684 , 124.16875 , 123.16822 , 120.70077 ,
       120.54055 , 121.31061 , 122.16919 , 127.02527 , 128.59535 ,
       131.37045 , 132.24858 , 131.83192 , 129.57059 , 129.04541 ],
      dtype=float32)

In [29]:
trace1 = go.Scatter(
    x = date_train,
    y = close_train,
    mode = 'lines',
    name = 'Data'
)

trace2 = go.Scatter(
      x = date_test,
      y = close_test,
      mode = 'lines',
      name = 'test'

)

trace3 = go.Scatter(
    x = date_test,
    y= prediction,
    mode = 'lines',
    name = 'prediction'

)

layout = go.Layout(
    title = 'Google Stock Prediction',
    xaxis = {'title':'Date'},
    yaxis = {'title':"Close"}

)
fig = go.Figure(data = [trace1,trace2,trace3],layout = layout)
fig.show()
