In [52]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import LSTM
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


In [2]:
df = pd.read_csv('D:\MY WORK\Turing Internship\DATA\Google_train_data.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1/3/2012,325.25,332.83,324.97,663.59,7380500
1,1/4/2012,331.27,333.87,329.08,666.45,5749400
2,1/5/2012,329.83,330.75,326.89,657.21,6590300
3,1/6/2012,328.34,328.77,323.68,648.24,5405900
4,1/9/2012,322.04,322.29,309.46,620.76,11688800


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    1258 non-null   object 
 1   Open    1258 non-null   float64
 2   High    1258 non-null   float64
 3   Low     1258 non-null   float64
 4   Close   1258 non-null   object 
 5   Volume  1258 non-null   object 
dtypes: float64(3), object(3)
memory usage: 59.1+ KB


In [4]:
df.drop(['Date'],axis='columns',inplace=True)

In [5]:
df.dtypes

Open      float64
High      float64
Low       float64
Close      object
Volume     object
dtype: object

In [6]:
df['Volume'] = df['Volume'].str.replace(',','')
df['Volume'] = df['Volume'].astype('float32')
df['Close'] = df['Close'].str.replace(',','')
df['Close'] = df['Close'].astype('float32')
df['Open'] = df['Open'].astype('float32')
df['High'] = df['High'].astype('float32')
df['Low'] = df['Low'].astype('float32')

In [7]:
df.dtypes

Open      float32
High      float32
Low       float32
Close     float32
Volume    float32
dtype: object

In [8]:
df.shape

(1258, 5)

In [9]:
df.isnull().sum()

Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64

In [10]:
df.isna().sum()

Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64

In [11]:
correlation = df.corr()
print(correlation['Close'].sort_values(ascending=False))

Close     1.000000
Volume    0.192357
Low       0.129001
Open      0.125832
High      0.124351
Name: Close, dtype: float64


In [12]:
X = df[['Open', 'High', 'Low', 'Volume']]

y = df['Close']

In [13]:
#Running LSTM
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [14]:
X_reshape = X_scaled.reshape((X_scaled.shape[0],1,X_scaled.shape[1]))

In [15]:
from sklearn.model_selection import KFold
fold = KFold(n_splits=10)

for train_index, test_index in fold.split(X_reshape, y):
    X_train, X_test = X_reshape[train_index], X_reshape[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

In [16]:
X_train.shape,y_train.shape

((1133, 1, 4), (1133,))

In [17]:
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(100,activation='relu',input_shape =(1, 4)),
    tf.keras.layers.Dense(80,activation='relu'),
    tf.keras.layers.Dense(60,activation='relu'),
    tf.keras.layers.Dense(40,activation='relu'),
    tf.keras.layers.Dense(20,activation='relu'),
    tf.keras.layers.Dense(1)
])

In [18]:
model.compile(optimizer='adam',loss='mae')

In [19]:
model.fit(X_train, y_train, epochs=100,validation_data=(X_test,y_test))

Epoch 1/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - loss: 715.8599 - val_loss: 768.5458
Epoch 2/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 708.2367 - val_loss: 704.5350
Epoch 3/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 594.9539 - val_loss: 295.9178
Epoch 4/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 241.9902 - val_loss: 68.5214
Epoch 5/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 195.6268 - val_loss: 19.8812
Epoch 6/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 164.8210 - val_loss: 14.4617
Epoch 7/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 143.4566 - val_loss: 20.4352
Epoch 8/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 122.2701 - val_loss: 74.5669
Epoch 9/100
[1m36/3

<keras.src.callbacks.history.History at 0x27c999c1640>

In [20]:
model.evaluate(X_test,y_test)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 10.4837


11.793651580810547

In [21]:
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
y_pred

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [23]:
from sklearn.metrics import mean_absolute_error

from sklearn.metrics import mean_squared_error
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")

Mean Absolute Error: 770.3359189453125


In [24]:
df1 = pd.read_csv('D:\MY WORK\Turing Internship\DATA\Google_test_data.csv')
df1

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2018-08-13,1236.979980,1249.272949,1233.640991,1235.010010,1235.010010,997300
1,2018-08-14,1235.189941,1245.869995,1225.109985,1242.099976,1242.099976,1348100
2,2018-08-15,1229.260010,1235.239990,1209.510010,1214.380005,1214.380005,1828800
3,2018-08-16,1224.729980,1226.000000,1202.550049,1206.489990,1206.489990,1343200
4,2018-08-17,1202.030029,1209.020020,1188.239990,1200.959961,1200.959961,1389600
...,...,...,...,...,...,...,...
247,2019-08-07,1156.000000,1178.444946,1149.624023,1173.989990,1173.989990,1444300
248,2019-08-08,1182.829956,1205.010010,1173.020020,1204.800049,1204.800049,1468000
249,2019-08-09,1197.989990,1203.880005,1183.603027,1188.010010,1188.010010,1065700
250,2019-08-12,1179.209961,1184.959961,1167.671997,1174.709961,1174.709961,1003000


In [25]:
df1.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [26]:
df1.drop(['Adj Close','Date'],axis='columns',inplace=True)

In [27]:
df1.head()

Unnamed: 0,Open,High,Low,Close,Volume
0,1236.97998,1249.272949,1233.640991,1235.01001,997300
1,1235.189941,1245.869995,1225.109985,1242.099976,1348100
2,1229.26001,1235.23999,1209.51001,1214.380005,1828800
3,1224.72998,1226.0,1202.550049,1206.48999,1343200
4,1202.030029,1209.02002,1188.23999,1200.959961,1389600


In [28]:
df1.dtypes

Open      float64
High      float64
Low       float64
Close     float64
Volume      int64
dtype: object

In [29]:
df1.isnull().sum()

Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64

In [30]:
df1.isna().sum()

Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64

In [31]:
X_new = df1[['Open', 'High', 'Low', 'Volume']]

y_new = df1['Close']

In [33]:
scaler = MinMaxScaler()
X_scaled_new = scaler.fit_transform(X_new)

In [34]:
X_reshape_new = X_scaled_new.reshape((X_scaled_new.shape[0],1,X_scaled_new.shape[1]))

In [38]:
foldN = KFold(n_splits=10)

for train_index_new, test_index_new in foldN.split(X_reshape_new, y_new):
    X_new_train, X_new_test = X_reshape_new[train_index_new], X_reshape_new[test_index_new]
    y_new_train, y_new_test = y_new.iloc[train_index_new], y_new.iloc[test_index_new]

In [39]:
X_new_train.shape,y_new_train.shape

((227, 1, 4), (227,))

In [41]:
modelN = tf.keras.Sequential([
    tf.keras.layers.LSTM(100,activation='relu',input_shape =(1, 4)),
    tf.keras.layers.Dense(80,activation='relu'),
    tf.keras.layers.Dense(60,activation='relu'),
    tf.keras.layers.Dense(40,activation='relu'),
    tf.keras.layers.Dense(20,activation='relu'),
    tf.keras.layers.Dense(1)
])

In [42]:
modelN.compile(optimizer='adam',loss='mae')

In [43]:
modelN.fit(X_new_train, y_new_train, epochs=100,validation_data=(X_new_test,y_new_test))

Epoch 1/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 33ms/step - loss: 1129.1823 - val_loss: 1172.1389
Epoch 2/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 1131.3036 - val_loss: 1171.9424
Epoch 3/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 1125.8390 - val_loss: 1171.5389
Epoch 4/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1126.5186 - val_loss: 1170.6705
Epoch 5/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 1125.2086 - val_loss: 1168.7719
Epoch 6/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1121.4915 - val_loss: 1164.6544
Epoch 7/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 1121.6742 - val_loss: 1155.9777
Epoch 8/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1107.6555 - val_loss: 1138.2911
Epoch 9/100


<keras.src.callbacks.history.History at 0x27c9fff2210>

In [44]:
modelN.evaluate(X_new_test,y_new_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - loss: 5.5574


5.557392597198486

In [45]:
y_pred_new = modelN.predict(X_new_test)
y_pred_new = np.argmax(y_pred_new, axis=1)
y_pred_new

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0], dtype=int64)

In [48]:
mae = mean_absolute_error(y_new_test, y_pred_new)
print(f"Mean Absolute Error: {mae}")

Mean Absolute Error: 1172.2475928


In [71]:
#Simple NN
foldNN = KFold(n_splits=10)

for train_index_NN, test_index_NN in foldNN.split(X_scaled, y):
    X_NN_train, X_NN_test = X_scaled[train_index_NN], X_scaled[test_index_NN]
    y_NN_train, y_NN_test = y.iloc[train_index_NN], y.iloc[test_index_NN]

In [75]:
X_NN_train.shape

(1133, 4)

In [76]:
modelNN = tf.keras.Sequential([
    
    tf.keras.layers.Dense(100,input_shape=(4,),activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(80,activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(60,activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(40,activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(20,activation='relu'),
    tf.keras.layers.Dense(1)
   
    
])

In [80]:
modelNN.compile(optimizer='adam',loss='mae')

In [81]:
modelNN.fit(X_NN_train, y_NN_train, epochs=50,validation_data=(X_NN_test,y_NN_test))

Epoch 1/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 115.7552 - val_loss: 68.3741
Epoch 2/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 112.7317 - val_loss: 38.6497
Epoch 3/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 110.8672 - val_loss: 56.0819
Epoch 4/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 118.2312 - val_loss: 34.2586
Epoch 5/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 107.0325 - val_loss: 27.4196
Epoch 6/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 111.6134 - val_loss: 58.8512
Epoch 7/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 111.4533 - val_loss: 28.0467
Epoch 8/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 107.9173 - val_loss: 67.7838
Epoch 9/50
[1m36/36[0m [32m━━

<keras.src.callbacks.history.History at 0x27ca8c1d1c0>

In [82]:
modelNN.evaluate(X_NN_test, y_NN_test)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 39.4727 


38.88019943237305

In [83]:
y_pred_NN = modelNN.predict(X_NN_test)
y_pred_NN = np.argmax(y_pred_NN,axis=1)
y_pred_NN

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [84]:
mae = mean_absolute_error(y_NN_test, y_pred_NN)
print(f"Mean Absolute Error: {mae}")

Mean Absolute Error: 770.3359189453125


In [60]:
#Running ML models
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

In [58]:
X_ML_train,X_ML_test,y_ML_train,y_ML_test = train_test_split(X,y,test_size=0.3,random_state=45)

In [61]:
cross_val_score(LinearRegression(),X_ML_train,y_ML_train)

array([-0.01913365,  0.22099164,  0.19525176,  0.2457807 ,  0.17957147])

In [62]:
cross_val_score(RandomForestRegressor(),X_ML_train,y_ML_train)

array([0.67324553, 0.51191769, 0.58656613, 0.78510882, 0.56423834])

In [63]:
cross_val_score(SVR(),X_ML_train,y_ML_train)

array([-0.00443972, -0.00780445, -0.014603  , -0.01500759, -0.02743259])

In [64]:
cross_val_score(DecisionTreeRegressor(),X_ML_train,y_ML_train)

array([0.57091339, 0.33792994, 0.26083133, 0.61299204, 0.11692612])

In [65]:
lr = LinearRegression()
lr.fit(X_ML_train,y_ML_train)
lr.score(X_ML_test,y_ML_test)

0.1992206927985316

In [66]:
rf = RandomForestRegressor()
rf.fit(X_ML_train,y_ML_train)
rf.score(X_ML_test,y_ML_test)

0.7386031258059611

In [67]:
svr = SVR()
svr.fit(X_ML_train,y_ML_train)
svr.score(X_ML_test,y_ML_test)

0.011322596814810848

In [68]:
dt = DecisionTreeRegressor()
dt.fit(X_ML_train,y_ML_train)
dt.score(X_ML_test,y_ML_test)

0.5435312550569387