<a href="https://colab.research.google.com/github/ARPIT-SINGHAL-22102003/BITCOIN-PRICE-PRDICTION/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Importing Libraries**

In [1]:
import pandas as pd
import numpy as np
import math
import datetime as dt

import matplotlib.pyplot as plt
from itertools import cycle
import plotly.express as px
from plotly.subplots import make_subplots
import seaborn as sns

from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected = True)


# **LOADING DATA**

In [2]:
#loading dataset
data = pd.read_csv("/content/BTC-USD.csv")
data = data.rename(columns={'Date': 'date','Open':'open','High':'high','Low':'low','Close':'close',
                                'Adj Close':'adj_close','Volume':'volume'})

In [3]:
#displaying first 5 rows of data
data.head()

Unnamed: 0,date,open,high,low,close,adj_close,volume
0,2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800.0
1,2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200.0
2,2014-09-19,424.102997,427.834991,384.532013,394.79599,394.79599,37919700.0
3,2014-09-20,394.673004,423.29599,389.882996,408.903992,408.903992,36863600.0
4,2014-09-21,408.084991,412.425995,393.181,398.821014,398.821014,26580100.0


In [4]:
#displaying the last 5 rows of the dataset
data.tail()

Unnamed: 0,date,open,high,low,close,adj_close,volume
2529,2021-08-20,46723.121094,49342.152344,46650.707031,49339.175781,49339.175781,34706870000.0
2530,2021-08-21,49327.074219,49717.019531,48312.199219,48905.492188,48905.492188,40585210000.0
2531,2021-08-22,48869.105469,49471.609375,48199.941406,49321.652344,49321.652344,25370980000.0
2532,2021-08-23,49291.675781,50482.078125,49074.605469,49546.148438,49546.148438,34305050000.0
2533,2021-08-24,49461.226563,49860.957031,48531.832031,48531.832031,48531.832031,35024990000.0


In [5]:
data.shape

(2534, 7)

In [6]:
#to view some basic statistical details like percentile, mean, std,etc. use describe()

data.describe()

Unnamed: 0,open,high,low,close,adj_close,volume
count,2530.0,2530.0,2530.0,2530.0,2530.0,2530.0
mean,8581.20879,8827.542899,8314.356161,8599.314657,8599.314657,13253950000.0
std,12672.856486,13066.98274,12225.14252,12695.366642,12695.366642,19793140000.0
min,176.897003,211.731003,171.509995,178.102997,178.102997,5914570.0
25%,572.634476,577.80925,566.011734,572.849243,572.849243,71785820.0
50%,5077.713868,5282.09668,4987.065674,5093.062745,5093.062745,4299460000.0
75%,9469.311767,9654.489258,9262.651612,9477.050537,9477.050537,20667140000.0
max,63523.753906,64863.097656,62208.964844,63503.457031,63503.457031,350967900000.0


# **CHECKING FOR NULL VALUES**

In [7]:
data.isnull().sum()

Unnamed: 0,0
date,0
open,4
high,4
low,4
close,4
adj_close,4
volume,4


In [8]:
#converting date field from string to date
data['date'] = pd.to_datetime(data.date)
data.head()

Unnamed: 0,date,open,high,low,close,adj_close,volume
0,2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800.0
1,2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200.0
2,2014-09-19,424.102997,427.834991,384.532013,394.79599,394.79599,37919700.0
3,2014-09-20,394.673004,423.29599,389.882996,408.903992,408.903992,36863600.0
4,2014-09-21,408.084991,412.425995,393.181,398.821014,398.821014,26580100.0


In [9]:
#finding the duration of the dataset
print("Starting date: ",data.iloc[0][0])
print("Ending date: ", data.iloc[-1][0])
print("Duration: ", data.iloc[-1][0]-data.iloc[0][0])

Starting date:  2014-09-17 00:00:00
Ending date:  2021-08-24 00:00:00
Duration:  2533 days 00:00:00



Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



# **DATA ANALYSIS**

In [10]:
y_2014 = data.loc[(data['date'] >= '2014-01-01')
                     & (data['date'] < '2015-01-01')]

y_2014.drop(y_2014[['adj_close','volume']],axis=1)

Unnamed: 0,date,open,high,low,close
0,2014-09-17,465.864014,468.174011,452.421997,457.334015
1,2014-09-18,456.859985,456.859985,413.104004,424.440002
2,2014-09-19,424.102997,427.834991,384.532013,394.795990
3,2014-09-20,394.673004,423.295990,389.882996,408.903992
4,2014-09-21,408.084991,412.425995,393.181000,398.821014
...,...,...,...,...,...
101,2014-12-27,327.583008,328.911011,312.630005,315.863007
102,2014-12-28,316.160004,320.028015,311.078003,317.239014
103,2014-12-29,317.700989,320.266998,312.307007,312.670013
104,2014-12-30,312.718994,314.808990,309.372986,310.737000


In [11]:
monthvise= y_2014.groupby(y_2014['date'].dt.strftime('%B'))[['open','close']].mean()
new_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August',
             'September', 'October', 'November', 'December']
monthvise = monthvise.reindex(new_order, axis=0)
monthvise


Unnamed: 0_level_0,open,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1
January,,
February,,
March,,
April,,
May,,
June,,
July,,
August,,
September,412.654003,407.182428
October,365.748,364.148873


In [15]:
import plotly.graph_objects as go
fig = go.Figure()

fig.add_trace(go.Bar(
    x=monthvise.index,
    y=monthvise['open'],
    name='Stock Open Price',
    marker_color='crimson'
))
fig.add_trace(go.Bar(
    x=monthvise.index,
    y=monthvise['close'],
    name='Stock Close Price',
    marker_color='lightsalmon'
))

fig.update_layout(barmode='group', xaxis_tickangle=-45,
                  title='Monthwise comparision between Stock open and close price')
fig.show()