In [1]:
# First we will import the necessary Library 

import os
import pandas as pd
import numpy as np
import math
import datetime as dt
import matplotlib.pyplot as plt

# For Evalution we will use these library

from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score 
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

# For model building we will use these library

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM


# For PLotting we will use these library

import matplotlib.pyplot as plt
from itertools import cycle
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [2]:
maindf = pd.read_csv("/content/oil_raw.csv")


In [3]:
maindf.head()

Unnamed: 0,date,Open,High,Low,Close,Vol,Change %
0,10/14/2022,92.81,93.45,89.81,90.15,153.89,-0.0467
1,10/13/2022,92.49,94.92,91.08,94.57,280.52,0.0229
2,10/12/2022,93.72,95.17,91.56,92.45,257.66,-0.0195
3,10/11/2022,95.93,96.46,93.11,94.29,259.03,-0.0198
4,10/10/2022,98.75,98.75,95.6,96.19,215.37,-0.0177


In [4]:
maindf.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3280 entries, 0 to 3279
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   date      3280 non-null   object 
 1   Open      3280 non-null   float64
 2   High      3280 non-null   float64
 3   Low       3280 non-null   float64
 4   Close     3280 non-null   float64
 5   Vol       3277 non-null   float64
 6   Change %  3280 non-null   float64
dtypes: float64(6), object(1)
memory usage: 179.5+ KB


In [5]:
maindf.isnull().sum()


date        0
Open        0
High        0
Low         0
Close       0
Vol         3
Change %    0
dtype: int64

In [6]:
print('Total number of days present in the dataset: ',maindf.shape[0])
print('Total number of fields present in the dataset: ',maindf.shape[1])

Total number of days present in the dataset:  3280
Total number of fields present in the dataset:  7


In [7]:
maindf.shape


(3280, 7)

In [8]:
maindf.describe()


Unnamed: 0,Open,High,Low,Close,Vol,Change %
count,3280.0,3280.0,3280.0,3280.0,3277.0,3280.0
mean,77.794741,78.819476,76.723098,77.813015,220.073195,0.00033
std,26.021711,26.18117,25.853465,26.059086,93.84,0.022827
min,19.9,21.29,15.98,19.33,5.89,-0.244
25%,55.925,56.62,55.2,55.91,164.96,-0.0095
50%,73.735,74.67,72.565,73.595,217.75,0.00095
75%,106.17,107.45,105.005,106.2875,277.28,0.010725
max,130.28,139.13,125.0,127.98,779.72,0.2102


In [9]:
print('Null Values:',maindf.isnull().values.sum())


Null Values: 3


In [10]:
print('NA values:',maindf.isnull().values.any())


NA values: True


In [11]:
Date = maindf["date"]


In [12]:
Date = pd.to_datetime(Date)

In [13]:
Date

0      2022-10-14
1      2022-10-13
2      2022-10-12
3      2022-10-11
4      2022-10-10
          ...    
3275   2010-02-11
3276   2010-02-10
3277   2010-02-09
3278   2010-02-08
3279   2010-02-05
Name: date, Length: 3280, dtype: datetime64[ns]

In [14]:
maindf.drop("date",inplace= True,axis=1)

In [15]:
Date = pd.DataFrame(Date)
maindf = pd.concat([Date,maindf],axis = 1)

In [16]:
maindf.head()

Unnamed: 0,date,Open,High,Low,Close,Vol,Change %
0,2022-10-14,92.81,93.45,89.81,90.15,153.89,-0.0467
1,2022-10-13,92.49,94.92,91.08,94.57,280.52,0.0229
2,2022-10-12,93.72,95.17,91.56,92.45,257.66,-0.0195
3,2022-10-11,95.93,96.46,93.11,94.29,259.03,-0.0198
4,2022-10-10,98.75,98.75,95.6,96.19,215.37,-0.0177


In [17]:
# Printing the start date and End date of the dataset

ed=maindf.iloc[0][0]
sd=maindf.iloc[-1][0]


print('Starting Date',sd)
print('Ending Date',ed)

Starting Date 2010-02-05 00:00:00
Ending Date 2022-10-14 00:00:00


In [18]:
maindf['date'] = pd.to_datetime(maindf['date'], format='%Y-%m-%d')
y_2010 = maindf.loc[(maindf['date'] >= '2010-02-05')
                     & (maindf['date'] < '2010-12-31')]

y_2010.drop(y_2010[['Change %','Vol']],axis=1)

Unnamed: 0,date,Open,High,Low,Close
3046,2010-12-30,94.18,94.42,92.34,93.09
3047,2010-12-29,94.34,94.38,93.62,94.14
3048,2010-12-28,93.90,94.42,93.66,94.38
3049,2010-12-27,93.76,94.52,93.08,93.85
3050,2010-12-24,94.45,94.74,93.18,93.77
...,...,...,...,...,...
3275,2010-02-11,72.80,73.58,72.02,73.05
3276,2010-02-10,71.89,73.00,70.83,72.54
3277,2010-02-09,69.63,72.47,69.61,72.13
3278,2010-02-08,70.36,70.82,69.21,70.11


In [19]:
monthvise= y_2010.groupby(y_2010['date'].dt.strftime('%B'))[['Open','Close']].mean()
new_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 
             'September', 'October', 'November', 'December']
monthvise = monthvise.reindex(new_order, axis=0)
monthvise

Unnamed: 0_level_0,Open,Close
date,Unnamed: 1_level_1,Unnamed: 2_level_1
January,,
February,74.589375,74.91125
March,79.759565,79.931304
April,85.552381,85.75381
May,77.60619,76.997619
June,75.618182,75.658636
July,75.186364,75.356364
August,77.247273,77.115
September,78.039091,78.417273
October,83.486667,83.53619


In [20]:
fig = go.Figure()

fig.add_trace(go.Bar(
    x=monthvise.index,
    y=monthvise['Open'],
    name='Stock Open Price',
    marker_color='crimson'
))
fig.add_trace(go.Bar(
    x=monthvise.index,
    y=monthvise['Close'],
    name='Stock Close Price',
    marker_color='lightsalmon'
))

fig.update_layout(barmode='group', xaxis_tickangle=-45, 
                  title='Monthwise comparision between Stock open and close price')
fig.show()

In [21]:
y_2010.groupby(y_2010['date'].dt.strftime('%B'))['Low'].min()
monthvise_high = y_2010.groupby(maindf['date'].dt.strftime('%B'))['High'].max()
monthvise_high = monthvise_high.reindex(new_order, axis=0)

monthvise_low = y_2010.groupby(y_2010['date'].dt.strftime('%B'))['Low'].min()
monthvise_low = monthvise_low.reindex(new_order, axis=0)

fig = go.Figure()
fig.add_trace(go.Bar(
    x=monthvise_high.index,
    y=monthvise_high,
    name='Stock high Price',
    marker_color='rgb(0, 153, 204)'
))
fig.add_trace(go.Bar(
    x=monthvise_low.index,
    y=monthvise_low,
    name='Stock low Price',
    marker_color='rgb(255, 128, 0)'
))

fig.update_layout(barmode='group', 
                  title=' Monthwise High and Low stock price')
fig.show()

In [22]:
names = cycle(['Stock Open Price','Stock Close Price','Stock High Price','Stock Low Price'])

fig = px.line(y_2010, x=y_2010.date, y=[y_2010['Open'], y_2010['Close'], 
                                          y_2010['High'], y_2010['Low']],
             labels={'Date': 'Date','value':'Stock value'})
fig.update_layout(title_text='Stock analysis chart', font_size=15, font_color='black',legend_title_text='Stock Parameters')
fig.for_each_trace(lambda t:  t.update(name = next(names)))
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

In [23]:
maindf['Date'] = pd.to_datetime(maindf['date'], format='%Y-%m-%d')

y_overall = maindf.loc[(maindf['date'] >= '2010-02-05')
                     & (maindf['date'] <= ' 2022-10-14')]

y_overall.drop(y_overall[['Change %','Vol']],axis=1)

Unnamed: 0,date,Open,High,Low,Close,Date
0,2022-10-14,92.81,93.45,89.81,90.15,2022-10-14
1,2022-10-13,92.49,94.92,91.08,94.57,2022-10-13
2,2022-10-12,93.72,95.17,91.56,92.45,2022-10-12
3,2022-10-11,95.93,96.46,93.11,94.29,2022-10-11
4,2022-10-10,98.75,98.75,95.60,96.19,2022-10-10
...,...,...,...,...,...,...
3275,2010-02-11,72.80,73.58,72.02,73.05,2010-02-11
3276,2010-02-10,71.89,73.00,70.83,72.54,2010-02-10
3277,2010-02-09,69.63,72.47,69.61,72.13,2010-02-09
3278,2010-02-08,70.36,70.82,69.21,70.11,2010-02-08


In [24]:
monthvise= y_overall.groupby(y_overall['date'].dt.strftime('%B'))[['Open','Close']].mean()
new_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 
             'September', 'October', 'November', 'December']
monthvise = monthvise.reindex(new_order, axis=0)
monthvise

Unnamed: 0_level_0,Open,Close
date,Unnamed: 1_level_1,Unnamed: 2_level_1
January,75.313035,75.361868
February,77.830039,77.976667
March,79.311224,79.211014
April,79.799814,79.906543
May,80.288084,80.230139
June,79.167348,79.220968
July,78.750418,78.765401
August,77.615813,77.684014
September,77.557419,77.510681
October,77.8352,77.854945


In [25]:
names = cycle(['Stock Open Price','Stock Close Price','Stock High Price','Stock Low Price'])

fig = px.line(y_overall, x=y_overall.date, y=[y_overall['Open'], y_overall['Close'], 
                                          y_overall['High'], y_overall['Low']],
             labels={'Date': 'Date','value':'Stock value'})
fig.update_layout(title_text='Stock analysis chart', font_size=15, font_color='black',legend_title_text='Stock Parameters')
fig.for_each_trace(lambda t:  t.update(name = next(names)))
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

In [26]:
# Lets First Take all the Close Price 
closedf = maindf[['date','Close']]
print("Shape of close dataframe:", closedf.shape)

Shape of close dataframe: (3280, 2)


In [27]:
fig = px.line(closedf, x=closedf.date, y=closedf.Close,labels={'date':'date','close':'Close Stock'})
fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange')
fig.update_layout(title_text='Whole period of timeframe of Bitcoin close price 2014-2022', plot_bgcolor='white', 
                  font_size=15, font_color='black')
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

In [28]:
closedf = closedf[closedf['date'] > '2021-10-14']
close_stock = closedf.copy()
print("Total data for prediction: ",closedf.shape[0])


Total data for prediction:  260


In [29]:
closedf


Unnamed: 0,date,Close
0,2022-10-14,90.15
1,2022-10-13,94.57
2,2022-10-12,92.45
3,2022-10-11,94.29
4,2022-10-10,96.19
...,...,...
255,2021-10-21,84.61
256,2021-10-20,85.82
257,2021-10-19,85.08
258,2021-10-18,84.33


In [30]:
fig = px.line(closedf, x=closedf.date, y=closedf.Close,labels={'date':'date','close':'Close Stock'})
fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange')
fig.update_layout(title_text='Considered period to predict Bitcoin close price', 
                  plot_bgcolor='white', font_size=15, font_color='black')
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

In [31]:
# deleting date column and normalizing using MinMax Scaler

del closedf['date']
scaler=MinMaxScaler(feature_range=(0,1))
closedf=scaler.fit_transform(np.array(closedf).reshape(-1,1))
print(closedf.shape)

(260, 1)


In [32]:
# we keep the training set as 60% and 40% testing set

training_size=int(len(closedf)*0.60)
test_size=len(closedf)-training_size
train_data,test_data=closedf[0:training_size,:],closedf[training_size:len(closedf),:1]
print("train_data: ", train_data.shape)
print("test_data: ", test_data.shape)

train_data:  (156, 1)
test_data:  (104, 1)


In [33]:
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]   ###i=0, 0,1,2,3-----99   100 
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [34]:
time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

print("X_train: ", X_train.shape)
print("y_train: ", y_train.shape)
print("X_test: ", X_test.shape)
print("y_test", y_test.shape)

X_train:  (140, 15)
y_train:  (140,)
X_test:  (88, 15)
y_test (88,)


In [35]:
# reshape input to be [samples, time steps, features] which is required for LSTM
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)

X_train:  (140, 15, 1)
X_test:  (88, 15, 1)


In [76]:
from keras.layers import Bidirectional
model = Sequential()
model.add(Bidirectional(LSTM(units=250, return_sequences=True, input_shape = (None,1)) ))
model.add(Dropout(0.2))
model.add(LSTM(units= 30 , return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units= 30 , return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units= 30))
model.add(Dropout(0.2))
model.add(Dense(units = 1))
model.compile(loss="mean_squared_error",optimizer="adam")


In [77]:
history = model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=200,batch_size=32,verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78