# Stock price Prediction using LSTM

## 1. Importing Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader as data
import yfinance as yf
%matplotlib notebook

In [None]:
pip install pandas_datareader

In [None]:
pip install yfinance

## 2. Loading our Dataset

In [15]:
s = '2010-01-01'
e = '2022-1-31'
name = input("Enter a name: ")
df = yf.download(name,s,e)

Enter a name: MSFT
[*********************100%***********************]  1 of 1 completed


In [16]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,30.620001,31.1,30.59,30.950001,23.683071,38409100
2010-01-05,30.85,31.1,30.639999,30.959999,23.690727,49749600
2010-01-06,30.879999,31.08,30.52,30.77,23.54533,58182400
2010-01-07,30.629999,30.700001,30.190001,30.450001,23.300468,50559700
2010-01-08,30.280001,30.879999,30.24,30.66,23.461159,51197400


In [17]:
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-24,292.200012,297.109985,276.049988,296.369995,293.618744,85731500
2022-01-25,291.519989,294.98999,285.170013,288.48999,285.81189,72848600
2022-01-26,307.98999,308.5,293.029999,296.709991,293.955566,90428900
2022-01-27,302.660004,307.299988,297.929993,299.839996,297.056549,53481300
2022-01-28,300.230011,308.480011,294.450012,308.26001,305.398407,49743700


In [18]:
df = df.reset_index()
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2010-01-04,30.620001,31.1,30.59,30.950001,23.683071,38409100
1,2010-01-05,30.85,31.1,30.639999,30.959999,23.690727,49749600
2,2010-01-06,30.879999,31.08,30.52,30.77,23.54533,58182400
3,2010-01-07,30.629999,30.700001,30.190001,30.450001,23.300468,50559700
4,2010-01-08,30.280001,30.879999,30.24,30.66,23.461159,51197400


## 3. Dropping columns

In [19]:
df = df.drop(['Date','Adj Close'],axis=1)

In [20]:
df

Unnamed: 0,Open,High,Low,Close,Volume
0,30.620001,31.100000,30.590000,30.950001,38409100
1,30.850000,31.100000,30.639999,30.959999,49749600
2,30.879999,31.080000,30.520000,30.770000,58182400
3,30.629999,30.700001,30.190001,30.450001,50559700
4,30.280001,30.879999,30.240000,30.660000,51197400
...,...,...,...,...,...
3035,292.200012,297.109985,276.049988,296.369995,85731500
3036,291.519989,294.989990,285.170013,288.489990,72848600
3037,307.989990,308.500000,293.029999,296.709991,90428900
3038,302.660004,307.299988,297.929993,299.839996,53481300


In [21]:
plt.plot(df.Close,c='orange')
plt.show()

### 4. Calculating and plotting Moving Average of 100 and 200 days

In [22]:
ma100 = df.Close.rolling(100).mean()
ma100

0            NaN
1            NaN
2            NaN
3            NaN
4            NaN
          ...   
3035    316.5437
3036    316.4103
3037    316.3659
3038    316.3529
3039    316.4337
Name: Close, Length: 3040, dtype: float64

In [23]:
plt.plot(df.Close)
plt.plot(ma100)
plt.show()

In [24]:
ma200 = df.Close.rolling(200).mean()
ma200

0             NaN
1             NaN
2             NaN
3             NaN
4             NaN
          ...    
3035    292.78020
3036    292.94310
3037    293.13420
3038    293.35545
3039    293.59925
Name: Close, Length: 3040, dtype: float64

In [25]:
plt.figure(figsize=(12,6))
plt.plot(df.Close)
plt.plot(ma100,'r',label='Moving Average for 100 days')
plt.plot(ma200,'g',label='Moving Average for 200 days')
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

In [26]:
df.shape

(3040, 5)

## 5. Splitting and Scaling the date

In [27]:
from sklearn.model_selection import train_test_split

In [28]:
data_train = pd.DataFrame(df['Close'][0:int(len(df)*0.70)])
data_test = pd.DataFrame(df['Close'][int(len(df)*0.70):int(len(df))])
print(data_train.shape)
print(data_test.shape)

(2128, 1)
(912, 1)


In [29]:
data_train.head()

Unnamed: 0,Close
0,30.950001
1,30.959999
2,30.77
3,30.450001
4,30.66


In [30]:
data_test.head()

Unnamed: 0,Close
2128,100.860001
2129,100.860001
2130,101.870003
2131,101.139999
2132,100.410004


In [31]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))

In [32]:
data_training_array = scaler.fit_transform(data_train)
data_training_array

array([[0.09989936],
       [0.10002515],
       [0.09763463],
       ...,
       [0.97936589],
       [0.9865375 ],
       [0.97030699]])

In [33]:
x_train = []
y_train = []

for i in range(100, data_training_array.shape[0]):
    x_train.append(data_training_array[i-100:i])
    y_train.append(data_training_array[i,0])
x_train,y_train = np.array(x_train),np.array(y_train)

In [34]:
x_train.shape

(2028, 100, 1)

## 6. LSTM Model

In [35]:
from keras.layers import Dense,Dropout,LSTM
from keras.models import Sequential

In [36]:
model = Sequential()
model.add(LSTM(units = 50,activation='relu',return_sequences=True,
              input_shape = (x_train.shape[1],1)))  # If we work with open,and other factors we can include other factors as well
model.add(Dropout(0.2))


model.add(LSTM(units = 60,activation='relu',return_sequences=True,
              ))  
model.add(Dropout(0.3))


model.add(LSTM(units = 80,activation='relu',return_sequences=True,
              ))  
model.add(Dropout(0.4))


model.add(LSTM(units = 120,activation='relu'))  
model.add(Dropout(0.5))

model.add(Dense(units=1))

In [37]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100, 50)           10400     
                                                                 
 dropout (Dropout)           (None, 100, 50)           0         
                                                                 
 lstm_1 (LSTM)               (None, 100, 60)           26640     
                                                                 
 dropout_1 (Dropout)         (None, 100, 60)           0         
                                                                 
 lstm_2 (LSTM)               (None, 100, 80)           45120     
                                                                 
 dropout_2 (Dropout)         (None, 100, 80)           0         
                                                                 
 lstm_3 (LSTM)               (None, 120)               9

In [70]:
model.compile(optimizer='adam',loss='mean_squared_error',metrics=['mae'])
history=model.fit(x_train,y_train,epochs=5)
#batch_size=64,verbose=1,validation_data=(x_test,y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [40]:
model.save('keras_model.h5')

In [42]:
data_test.head()

Unnamed: 0,Close
2128,100.860001
2129,100.860001
2130,101.870003
2131,101.139999
2132,100.410004


In [43]:
past_100_days = data_train.tail(100)

In [44]:
past_100_days.shape

(100, 1)

In [45]:
data_train.tail(100)

Unnamed: 0,Close
2028,91.820000
2029,92.330002
2030,94.059998
2031,93.919998
2032,92.739998
...,...
2123,101.050003
2124,101.309998
2125,100.849998
2126,101.419998


In [46]:
final_df = past_100_days.append(data_test,ignore_index=True)

  final_df = past_100_days.append(data_test,ignore_index=True)


In [47]:
final_df.shape

(1012, 1)

In [48]:
input_data = scaler.fit_transform(final_df)

In [49]:
input_data.shape

(1012, 1)

In [50]:
x_test = []
y_test = []

for i in range(100,input_data.shape[0]):
  x_test.append(input_data[i-100:i])
  y_test.append(input_data[i,0])

In [51]:
x_test, y_test = np.array(x_test),np.array(y_test)

In [52]:
x_test.shape

(912, 100, 1)

In [53]:
y_test.shape

(912,)

## 8. Making predictions

In [54]:
y_predicted = model.predict(x_test)



In [55]:
y_predicted.shape

(912, 1)

In [56]:
y_test

array([0.0614103 , 0.0614103 , 0.06532352, 0.06249515, 0.05966681,
       0.05184036, 0.05451376, 0.04854707, 0.05277023, 0.05269275,
       0.05811701, 0.05439753, 0.0571484 , 0.06257266, 0.06524602,
       0.06629214, 0.06574972, 0.07431229, 0.07911662, 0.07710191,
       0.08113133, 0.07791554, 0.07512592, 0.08237116, 0.08895777,
       0.0877567 , 0.10003875, 0.09535065, 0.08783417, 0.07888416,
       0.08163503, 0.08240991, 0.08740798, 0.08922898, 0.08957767,
       0.09248352, 0.09484695, 0.09554435, 0.09294847, 0.08988763,
       0.09511816, 0.0877567 , 0.08767919, 0.08744673, 0.08469586,
       0.08124759, 0.08543199, 0.08736922, 0.09062379, 0.09527314,
       0.0978303 , 0.10464935, 0.10437813, 0.10585045, 0.10344827,
       0.09097248, 0.0919411 , 0.08988763, 0.09442075, 0.10162727,
       0.10344827, 0.10809765, 0.1098799 , 0.10511429, 0.10925997,
       0.10340952, 0.11065478, 0.11332817, 0.11491669, 0.1140643 ,
       0.11224333, 0.11390935, 0.11375437, 0.1185587 , 0.11677

In [57]:
y_predicted

array([[0.08029775],
       [0.08128957],
       [0.08210207],
       [0.08273283],
       [0.08319014],
       [0.08348048],
       [0.08358641],
       [0.08349241],
       [0.08318277],
       [0.08266228],
       [0.08195382],
       [0.08110161],
       [0.080156  ],
       [0.07917529],
       [0.07823162],
       [0.07740305],
       [0.07676423],
       [0.07637067],
       [0.07628521],
       [0.07657056],
       [0.07726286],
       [0.07837898],
       [0.07989906],
       [0.08176112],
       [0.08389948],
       [0.08626017],
       [0.08878731],
       [0.09145029],
       [0.09421376],
       [0.09701113],
       [0.09973152],
       [0.10226032],
       [0.10449612],
       [0.10637672],
       [0.10787551],
       [0.10898862],
       [0.10976369],
       [0.11027994],
       [0.11062483],
       [0.11086511],
       [0.11103068],
       [0.11115171],
       [0.11123274],
       [0.11126203],
       [0.1112159 ],
       [0.11106706],
       [0.110783  ],
       [0.110

In [58]:
scaler.scale_

array([0.00387447])

In [59]:
scale_factor = 1/0.02099517
y_predicted = y_predicted*scale_factor
y_test = y_test*scale_factor

In [60]:
plt.figure(figsize=(12,6))
plt.plot(y_test,'b',label='Original Price')
plt.plot(y_predicted,'r',label='Predicted Price')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

## 9. Calculating Accuracy and Loss

In [61]:
mae = history.history['mae'][-1]

In [62]:
mae

0.031618546694517136

In [63]:
mse=history.history['loss'][-1]

In [64]:
mse

0.00199929834343493

In [65]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
mse = mean_squared_error(y_test,y_predicted)
mse

3.0902595909733868

In [66]:
r2 = r2_score(y_test,y_predicted)

In [67]:
r2

0.9815824175865216

In [68]:
mae = mean_absolute_error(y_test,y_predicted)

In [69]:
mae

1.310675610572165