## Importing the libraries

In [1]:
import os
import pandas as pd
import numpy as np
import math
import datetime as dt
import matplotlib.pyplot as plt

In [2]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score 
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

In [3]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.compat.v1.losses import sparse_softmax_cross_entropy # For Deprecated Errors

In [4]:
import matplotlib.pyplot as plt
from itertools import cycle
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

## Importing/Unzip the Dataset

To get the dataset, run the following command in the terminal:

1- pip install kaggle

2- Create New API Token in Kaggle Account and place the file in C:/Users/<username>/.kaggle/(.json format)

3- kaggle datasets download -d jkraak/bitcoin-price-dataset

In [None]:
import zipfile
with zipfile.ZipFile('bitcoin-price-dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('dataset')

In [6]:
main_df = pd.read_csv('dataset/bitcoin_2017_to_2023.csv')

main_df = main_df[::-1]

## Data Analysis

In [7]:
print('Total number of days present in the dataset: ',main_df.shape[0])
print('Total number of fields present in the dataset: ',main_df.shape[1])

Total number of days present in the dataset:  3126000
Total number of fields present in the dataset:  10


In [8]:
main_df.head()

Unnamed: 0,timestamp,open,high,low,close,volume,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume
3125999,2017-08-17 04:00:00,4261.48,4261.48,4261.48,4261.48,1.775183,7564.906851,3,0.075183,320.390851
3125998,2017-08-17 04:01:00,4261.48,4261.48,4261.48,4261.48,0.0,0.0,0,0.0,0.0
3125997,2017-08-17 04:02:00,4280.56,4280.56,4280.56,4280.56,0.261074,1117.542921,2,0.261074,1117.542921
3125996,2017-08-17 04:03:00,4261.48,4261.48,4261.48,4261.48,0.012008,51.171852,3,0.012008,51.171852
3125995,2017-08-17 04:04:00,4261.48,4261.48,4261.48,4261.48,0.140796,599.999338,1,0.140796,599.999338


In [9]:
main_df.tail()

Unnamed: 0,timestamp,open,high,low,close,volume,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume
4,2023-08-01 13:15:00,28896.0,28907.42,28893.03,28907.41,37.74657,1090761.0,686,16.50452,476955.246611
3,2023-08-01 13:16:00,28907.41,28912.74,28907.41,28908.53,15.8961,459555.6,483,10.22981,295738.166916
2,2023-08-01 13:17:00,28908.52,28908.53,28902.48,28902.49,11.52263,333053.2,451,2.70873,78290.170121
1,2023-08-01 13:18:00,28902.48,28902.49,28902.48,28902.49,4.77589,138035.1,317,2.24546,64899.385195
0,2023-08-01 13:19:00,28902.48,28902.49,28902.48,28902.49,4.68658,135453.8,258,0.89391,25836.224836


In [10]:
main_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3126000 entries, 3125999 to 0
Data columns (total 10 columns):
 #   Column                        Dtype  
---  ------                        -----  
 0   timestamp                     object 
 1   open                          float64
 2   high                          float64
 3   low                           float64
 4   close                         float64
 5   volume                        float64
 6   quote_asset_volume            float64
 7   number_of_trades              int64  
 8   taker_buy_base_asset_volume   float64
 9   taker_buy_quote_asset_volume  float64
dtypes: float64(8), int64(1), object(1)
memory usage: 238.5+ MB


In [11]:
main_df.describe()

Unnamed: 0,open,high,low,close,volume,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume
count,3126000.0,3126000.0,3126000.0,3126000.0,3126000.0,3126000.0,3126000.0,3126000.0,3126000.0
mean,20089.47,20102.17,20076.66,20089.46,52.908,1155882.0,1018.583,26.32231,572721.1
std,16058.96,16069.26,16048.71,16058.96,97.74388,2335868.0,1817.813,49.7288,1193135.0
min,2830.0,2830.0,2817.0,2817.0,0.0,0.0,0.0,0.0,0.0
25%,7624.747,7629.6,7620.0,7624.798,11.20167,112233.5,150.0,5.100715,51250.57
50%,11699.99,11706.81,11692.49,11700.0,23.87539,370646.7,413.0,11.4899,175369.5
75%,29899.57,29907.24,29890.51,29899.57,53.9363,1276762.0,1026.0,26.93009,621025.0
max,69000.0,69000.0,68786.7,69000.0,5877.775,145955700.0,107315.0,3537.453,89475510.0


### Checking for null values

In [12]:
print('Null Values:',main_df.isnull().values.sum())

Null Values: 0


In [13]:
print('NA values:',main_df.isnull().values.any())

NA values: False


In [14]:
sd = main_df.iloc[0].iloc[0]
ed = main_df.iloc[-1].iloc[0]

print('Starting Date:', sd)
print('Ending Date:', ed)


Starting Date: 2017-08-17 04:00:00
Ending Date: 2023-08-01 13:19:00


In [15]:
main_df_explore = main_df.copy()
main_df_explore.set_index('timestamp',inplace=True)
main_df_explore.index = pd.to_datetime(main_df_explore.index)

In [20]:
dashboard_data = main_df_explore[['close', 'volume']].resample('D').last()

fig1 = px.line(dashboard_data, x=dashboard_data.index, y='close', labels={'close':'Bitcoin Close Price'})
fig1.update_layout(title='Bitcoin Close Price Trends', xaxis_title='Date', yaxis_title='Price(USD)')

fig2 = px.bar(dashboard_data, x=dashboard_data.index, y='volume', labels={'volume':'Trading Volume'})
fig2.update_layout(title='Bitcoin Trading Volume', xaxis_title='Date', yaxis_title='Volume')

fig1.show()
fig2.show()

## Data Preprocessing

In [None]:
main_df['timestamp'] = pd.to_datetime(main_df['timestamp'])
main_df['Year'] = main_df['timestamp'].dt.year
main_df['Month'] = main_df['timestamp'].dt.month
main_df['Day'] = main_df['timestamp'].dt.day
main_df['Weekday'] = main_df['timestamp'].dt.weekday
main_df['WeekOfYear'] = main_df['timestamp'].dt.isocalendar().week
main_df['Quarter'] = main_df['timestamp'].dt.quarter
main_df['DayOfYear'] = main_df['timestamp'].dt.dayofyear
main_df['IsWeekend'] = main_df['timestamp'].dt.weekday >= 5
main_df['IsStartOfMonth'] = main_df['timestamp'].dt.is_month_start
main_df['IsEndOfMonth'] = main_df['timestamp'].dt.is_month_end
main_df.set_index('timestamp', inplace=True)

In [None]:
main_df.sample(1)