# Exploratory Data Analysis (EDA) and Prediction on Bitcoin cryptocurrency

## Importing necessary libraries

---






In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
%matplotlib inline

# To hide warnings
import warnings
warnings.filterwarnings('ignore')

# Import for datareading:
import pandas_datareader as web
from pandas_datareader import data, wb
import requests
import datetime, dateutil.relativedelta
%pip install --upgrade mplfinance
import mplfinance as mpl

# Import tensorflow and keras:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
assert tf.__version__ >= "2.0"
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import  Dropout, Dense, LSTM
from tensorflow.keras.models import Sequential

# Initilise scaler:
scaler = MinMaxScaler()

Collecting mplfinance
[?25l  Downloading https://files.pythonhosted.org/packages/44/fe/9a4018a4bed7264ecc630c373cbf3315711944bf93ebd8e2fa1279a04262/mplfinance-0.12.7a12-py3-none-any.whl (61kB)
[K     |█████▎                          | 10kB 12.1MB/s eta 0:00:01[K     |██████████▋                     | 20kB 17.9MB/s eta 0:00:01[K     |████████████████                | 30kB 10.8MB/s eta 0:00:01[K     |█████████████████████▎          | 40kB 9.1MB/s eta 0:00:01[K     |██████████████████████████▋     | 51kB 8.5MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 8.9MB/s eta 0:00:01[K     |████████████████████████████████| 71kB 4.9MB/s 
Installing collected packages: mplfinance
Successfully installed mplfinance-0.12.7a12


## Reading recent data from Yahoo Finance


---








In [3]:
# Bitcoin data from today till past 2 year ago:
end = datetime.datetime.now()
end_t = datetime.datetime.now().strftime('%Y-%m-%d')
start = datetime.datetime(end.year - 2,end.month,end.day).strftime('%Y-%m-%d')

In [4]:
# Bitcoin data in INR 
bitcoin = globals()['BTC-INR'] = web.DataReader('BTC-INR', 'yahoo', start, end_t).reset_index()

In [5]:
bitcoin.tail()


Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close
570,2021-04-13,4874119.0,4619650.0,4775827.0,4734890.5,5810924000000.0,4734890.5
571,2021-04-14,4769128.5,4671974.0,4732302.0,4731307.5,4554978000000.0,4731307.5
572,2021-04-15,4750986.0,4480725.0,4727160.0,4591076.5,6285173000000.0,4591076.5
573,2021-04-16,4665594.0,4500174.0,4587880.0,4524215.5,4930902000000.0,4524215.5
574,2021-04-18,4546303.0,3977803.5,4212293.5,4130716.75,7058531000000.0,4130716.75


In [6]:
# Datatype
bitcoin.dtypes

Date         datetime64[ns]
High                float64
Low                 float64
Open                float64
Close               float64
Volume              float64
Adj Close           float64
dtype: object

In [7]:
bitcoin.shape

(575, 7)

## Exploratory Data Analysis (EDA) on bitcoin data

---







In [8]:
# Displaying first five rows
bitcoin.head()

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close
0,2019-09-17,731490.6875,725478.5,734975.875,725961.0,1151007000000.0,725961.0
1,2019-09-18,735039.75,703342.8125,726121.0,732951.1875,1423414000000.0,732951.1875
2,2019-09-19,732415.8125,721472.4375,732944.3125,724993.9375,1049163000000.0,724993.9375
3,2019-09-20,725453.6875,712110.4375,725136.875,713463.9375,955959500000.0,713463.9375
4,2019-09-21,717307.0625,706490.9375,713777.125,717018.5625,939823900000.0,717018.5625


In [9]:
# Checking null values
bitcoin.isnull().sum().values

array([0, 0, 0, 0, 0, 0, 0])

In [10]:
# Checking unique values
bitcoin.nunique().values

array([573, 575, 575, 574, 575, 575, 575])

In [11]:
# Describing the columns
bitcoin.describe()


Unnamed: 0,High,Low,Open,Close,Volume,Adj Close
count,575.0,575.0,575.0,575.0,575.0,575.0
mean,1309500.0,1239724.0,1273880.0,1280027.0,2676410000000.0,1280027.0
std,1177475.0,1100001.0,1137872.0,1145088.0,1664462000000.0,1145088.0
min,397345.5,309974.4,371721.6,370008.6,864097200000.0,370008.6
25%,622896.1,597530.9,612896.4,612925.5,1563123000000.0,612925.5
50%,735039.8,712357.2,727280.2,727420.8,2345102000000.0,727420.8
75%,1380204.0,1313590.0,1346009.0,1349281.0,3382939000000.0,1349281.0
max,4874119.0,4671974.0,4775827.0,4774301.0,25829980000000.0,4774301.0


### Mean price of bitcoin in past 2 years


In [12]:
round(np.mean(bitcoin['Close']),2)

1280026.75

### Highest price of bitcoin in past 2 years

In [13]:
round(max(bitcoin['Close']),2)

4774301.0

### Lowest price of bitcoin in past 2 years

In [14]:
round(min(bitcoin['Close']),2)

370008.56

### Line graph for Closing price of bitcoin

In [15]:
from plotly.subplots import make_subplots

fig = make_subplots(specs=[[{'secondary_y': True}]])

fig.add_trace(go.Scatter(x=bitcoin['Date']
                         , y=bitcoin['Adj Close']
                         , name='Bitcoin'
                        )
             , secondary_y=False
             )
# Add figure title
fig.update_layout(
    title_text='Bitcoin closing price'
)

# Set x-axis title
fig.update_xaxes(title_text='Date')

# Set y-axes titles
fig.update_yaxes(title_text='Bitcoin (INR)', secondary_y=False)

fig.show()

### Finding the moving average for a week, mid month and full month ie. 7,15 and 30 days

In [16]:
ma_day = [7,15,30]
for ma in ma_day:
    column_name = 'Moving avg %s' %(str(ma))
    bitcoin[column_name] = bitcoin['Adj Close'].rolling(window=ma).mean()

In [17]:
bitcoin.columns

Index(['Date', 'High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close',
       'Moving avg 7', 'Moving avg 15', 'Moving avg 30'],
      dtype='object')

### 15 Day moving average graph 

In [18]:
fig = go.Figure()

# Add Traces for Adj close
fig.add_trace(go.Scatter(x=bitcoin['Date']
                         , y=bitcoin['Adj Close']
                         , name='Adj Close'
                         , visible=True
                        ))

# Add Traces for 15 days moving average
fig.add_trace(go.Scatter(x=bitcoin['Date']
                         , y=bitcoin['Moving avg 15']
                         , name='15 days moving average'
                         , visible=True
                        ))
# Set title
fig.update_layout(title_text='15 Days Moving Average')

# Set x and y-axis titles
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Price')

fig.show() 

### 30 days moving average graph

In [19]:
fig = go.Figure()

# Add Traces for adj close
fig.add_trace(go.Scatter(x=bitcoin['Date']
                         , y=bitcoin['Adj Close']
                         , name='Adj Close'
                         , visible=True
                        ))

# Add Traces for 30 days moving average
fig.add_trace(go.Scatter(x=bitcoin['Date']
                         , y=bitcoin['Moving avg 30']
                         , name='30 days moving average'
                         , visible=True
                        ))
# Set title
fig.update_layout(title_text='30 Days Moving Average')

# Set x and y-axis titles
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Price')

fig.show() 

### 7 days moving average graph

In [20]:
fig = go.Figure()

# Add Traces for adj close
fig.add_trace(go.Scatter(x=bitcoin['Date']
                         , y=bitcoin['Adj Close']
                         , name='Adj Close'
                         , visible=True
                        ))

# Add Traces for 7 days moving average
fig.add_trace(go.Scatter(x=bitcoin['Date']
                         , y=bitcoin['Moving avg 7']
                         , name='7 days moving average'
                        , visible=True
                        ))
# Set title
fig.update_layout(title_text='7 Days Moving Average')

# Set x and y-axis titles
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Price')

fig.show() 

### Candlestick chart for bitcoin analysis 

In [21]:
fig = go.Figure()

fig.add_trace(go.Candlestick(x=bitcoin['Date']
                             , open=bitcoin['Open']
                             , high=bitcoin['High']
                             , low=bitcoin['Low']
                             , close=bitcoin['Adj Close']
                            , visible = True
                             ,increasing_line_color= 'cyan', decreasing_line_color= 'gray'
                            ))
# Set title
fig.update_layout(title_text='Candlestick chart for bitcoin')

# Set x and y-axis titles
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Price')

# fig.show() 

### Difference between opening and closing price

In [22]:
# Difference between Close and Open:
bitcoin['Difference'] = bitcoin['Close'] - bitcoin['Open']
# Specifying colors for positive and negative difference
bitcoin['Color'] = np.where(bitcoin['Difference']<0, 'red', 'green')
bitcoin.head()

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close,Moving avg 7,Moving avg 15,Moving avg 30,Difference,Color
0,2019-09-17,731490.6875,725478.5,734975.875,725961.0,1151007000000.0,725961.0,,,,-9014.875,red
1,2019-09-18,735039.75,703342.8125,726121.0,732951.1875,1423414000000.0,732951.1875,,,,6830.1875,green
2,2019-09-19,732415.8125,721472.4375,732944.3125,724993.9375,1049163000000.0,724993.9375,,,,-7950.375,red
3,2019-09-20,725453.6875,712110.4375,725136.875,713463.9375,955959500000.0,713463.9375,,,,-11672.9375,red
4,2019-09-21,717307.0625,706490.9375,713777.125,717018.5625,939823900000.0,717018.5625,,,,3241.4375,green


### Bar chart for price difference

In [23]:
fig = go.Figure()

# Add Trace for price difference
fig.add_trace(go.Bar(x=bitcoin['Date']
                     , y=bitcoin['Difference']
                     , marker_color=bitcoin['Color']
                     , visible = True
                    ))

# Set x and y-axis titles
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Price difference')

#fig.show()   

### Histogram for price 

In [34]:
fig = px.histogram(bitcoin, x="Adj Close")
fig.update_layout(title_text='Distributions of Adj Close', height=800)
fig.show()

In [24]:
# Colrelation
corr_matrix = bitcoin.corr()
corr_matrix

Unnamed: 0,High,Low,Open,Close,Volume,Adj Close,Moving avg 7,Moving avg 15,Moving avg 30,Difference
High,1.0,0.998558,0.999187,0.99929,0.619995,0.99929,0.997669,0.993487,0.988159,0.110535
Low,0.998558,1.0,0.998732,0.998998,0.599167,0.998998,0.996822,0.993229,0.989568,0.113277
Open,0.999187,0.998732,1.0,0.998333,0.612268,0.998333,0.997976,0.993971,0.988916,0.080043
Close,0.99929,0.998998,0.998333,1.0,0.610607,1.0,0.996713,0.992459,0.987863,0.137442
Volume,0.619995,0.599167,0.612268,0.610607,1.0,0.610607,0.613341,0.600716,0.577637,0.037945
Adj Close,0.99929,0.998998,0.998333,1.0,0.610607,1.0,0.996713,0.992459,0.987863,0.137442
Moving avg 7,0.997669,0.996822,0.997976,0.996713,0.613341,0.996713,1.0,0.997632,0.992259,0.085905
Moving avg 15,0.993487,0.993229,0.993971,0.992459,0.600716,0.992459,0.997632,1.0,0.996483,0.079444
Moving avg 30,0.988159,0.989568,0.988916,0.987863,0.577637,0.987863,0.992259,0.996483,1.0,0.085581
Difference,0.110535,0.113277,0.080043,0.137442,0.037945,0.137442,0.085905,0.079444,0.085581,1.0


### Line graph of Volume transaction distribution

In [25]:
fig = go.Figure()

# Add Traces for adj close
fig.add_trace(go.Scatter(x=bitcoin['Date']
                         , y=bitcoin['Volume']
                         , name='Volume of transactions'
                        , line=dict(color="#FF00FF")
                         , visible=True
                        ))

# Set title
fig.update_layout(title_text='Transaction volume')

# Set x and y-axis titles
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Volume')


## Price Prediction


---





### Data preprocessing

In [26]:
# Fill in missing values and sort index 
bitcoin_data = bitcoin[['Adj Close']].fillna(method='backfill').sort_index(ascending=True, axis=0)


In [27]:
# Scale and reshape Adj Close values
adj_price = bitcoin_data['Adj Close'].values.reshape(-1, 1)
bitcoin_data['Adj Close'] = scaler.fit_transform(adj_price)

### Splitting into training and testing

In [28]:
# Split data into training and testing 
prediction_days = 13

training = bitcoin_data[:len(bitcoin_data)-prediction_days]
testing = bitcoin_data[len(bitcoin_data)-prediction_days:]


### Training Data

In [29]:
# Define X_train, y_train:
training_set = training.values
training_set = scaler.fit_transform(training_set)

X_train = training_set[0:len(training_set)-1]
X_train = np.reshape(X_train, (len(X_train), 1, 1))

y_train = training_set[1:len(training_set)]

### LSTM recurrent neural network model 

In [30]:
# Sequential model
model = Sequential()
# Add the input layer and the LSTM layer
model.add(LSTM(units = 4, activation = 'sigmoid', input_shape=(None, 1)))
# Add the output layer
model.add(Dense(units = 1))

# Compile the model:
model.compile(optimizer = 'adam', loss = 'mean_squared_error')

# Using the training set to train the model:
history= model.fit(X_train, y_train, batch_size = 5, epochs = 100 )
history


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7fb1eb5f59d0>

### Testing Data 

In [31]:
testing_set = testing.values

inputs = np.reshape(testing_set, (len(testing_set), 1))
inputs = scaler.transform(inputs)
inputs = np.reshape(inputs, (len(inputs), 1, 1))

predicted_price = model.predict(inputs)
predicted_price = scaler.inverse_transform(predicted_price)

###Line graph showing loss with epoch transition

In [32]:
loss_train = history.history['loss']
epochs = list(range(1,101))
fig = go.Figure()

# Add Traces
fig.add_trace(go.Scatter(x=epochs
                         , y=loss_train
                         , name='Training loss'
                         , line = dict(color='orange')
         
                        ))

# Set title
fig.update_layout(title_text='Loss with epoch transition')

# Set x and y-axis titles
fig.update_xaxes(title_text='Epoch')
fig.update_yaxes(title_text='Loss')

fig.show()   

## Line graph of Real and Predicted price


---







In [33]:
fig = go.Figure()

# Add Traces
fig.add_trace(go.Scatter(x=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
                         , y=testing_set[:, 0]
                         , name='Price real'
                         , line = dict(color='purple', width=2, dash='dash')
                         , visible=True
                        ))

fig.add_trace(go.Scatter(x=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
                         , y=predicted_price[:, 0]
                         , name='Price predicted'
                         , line = dict(color='gold', width=2, dash='dash')
                         , visible=True
                        ))
fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
# Set title
fig.update_layout(title_text='Bitcoin Price Prediction')

# Set x and y-axis titles
fig.update_xaxes(title_text='Number of Days')
fig.update_yaxes(title_text='Price')

fig.show()   