In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import plotly.graph_objs as go
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
from sklearn import svm
import plotly.offline as py
py.init_notebook_mode(connected=True)

In [None]:
df = pd.read_csv('bitstampUSD_1-min_data_2012-01-01_to_2018-11-11.csv')

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
# Drop all null values
df = df.dropna()

In [None]:
df.head()

In [None]:
df.isnull().sum()

## Linear Regression model with single feature

In [None]:
# A new dataframe with only Timestamp and Weighted_Price
df1 = df[['Timestamp', 'Weighted_Price']]

In [None]:
X = df1.drop('Weighted_Price', axis = 1)
y = df1['Weighted_Price'].copy()

In [None]:
# Splitting up the data into train and test set
bitcoin_x_train, bitcoin_x_test, bitcoin_y_train, bitcoin_y_test = train_test_split(X, y, test_size = 0.20, random_state = 41) 

In [None]:
print(bitcoin_x_train.shape)
print(bitcoin_x_test.shape)
print(bitcoin_y_train.shape)
print(bitcoin_y_test.shape)

In [None]:
# creating the model
regr = linear_model.LinearRegression()

In [None]:
# training the model
regr.fit(bitcoin_x_train, bitcoin_y_train)

In [None]:
# predicting the result
bitcoin_y_pred = regr.predict(bitcoin_x_test)

In [None]:
print("Mean squared error for Linear Regression with single feature: %.3f"
      % mean_squared_error(bitcoin_y_test, bitcoin_y_pred))

## Linear Regression with multiple feature

In [None]:
# A new dataframe with only Timestamp, Volume_(BTC), Weighted_Price
df2 = df[['Volume_(BTC)', 'Timestamp', 'Weighted_Price']]

In [None]:
X = df2.drop('Weighted_Price', axis = 1)
y = df2['Weighted_Price'].copy()

In [None]:
# Splitting up the data into train and test set
bitcoin_x_train, bitcoin_x_test, bitcoin_y_train, bitcoin_y_test = train_test_split(X, y, test_size = 0.20, random_state = 41) 

In [None]:
print(bitcoin_x_train.shape)
print(bitcoin_x_test.shape)
print(bitcoin_y_train.shape)
print(bitcoin_y_test.shape)

In [None]:
regr.fit(bitcoin_x_train, bitcoin_y_train)

In [None]:
bitcoin_y_pred = regr.predict(bitcoin_x_test)

In [None]:
print("Mean squared error for Linear Regression with multiple features: %.3f"
      % mean_squared_error(bitcoin_y_test, bitcoin_y_pred))

## Data Analysis

In [None]:
# Converting the data into date format from minute one and also grouping weighted price date wise
df['date'] = pd.to_datetime(df['Timestamp'], unit='s').dt.date
group = df.groupby('date')
Real_Price = group['Weighted_Price'].mean()

In [None]:
Real_Price.shape

In [None]:
# Creating train and test sets
prediction_days = 300
df_train = Real_Price[:len(Real_Price)-prediction_days-1]
df_test = Real_Price[len(Real_Price)-prediction_days:]

In [None]:
working_data = [df_train, df_test]
working_data = pd.concat(working_data)
working_data = working_data.reset_index()
working_data['date'] = pd.to_datetime(working_data['date'])
working_data = working_data.set_index('date')

In [None]:
s = sm.tsa.seasonal_decompose(working_data.Weighted_Price.values, freq=60)

trace1 = go.Scatter(x = np.arange(0, len(s.trend), 1),y = s.trend,mode = 'lines',name = 'Trend',
    line = dict(color = ('rgb(244, 146, 65)'), width = 4))
trace2 = go.Scatter(x = np.arange(0, len(s.seasonal), 1),y = s.seasonal,mode = 'lines',name = 'Seasonal',
    line = dict(color = ('rgb(66, 244, 155)'), width = 2))

trace3 = go.Scatter(x = np.arange(0, len(s.resid), 1),y = s.resid,mode = 'lines',name = 'Residual',
    line = dict(color = ('rgb(209, 244, 66)'), width = 2))

trace4 = go.Scatter(x = np.arange(0, len(s.observed), 1),y = s.observed,mode = 'lines',name = 'Observed',
    line = dict(color = ('rgb(66, 134, 244)'), width = 2))

data = [trace1, trace2, trace3, trace4]
layout = dict(title = 'Seasonal decomposition', xaxis = dict(title = 'Time'), yaxis = dict(title = 'Price, USD'))
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='seasonal_decomposition')

## Prediction using Recurrent neural networks for day wise data

In [None]:
print("Train data length:" + str(len(df_train)))
print("Test data length:" + str(len(df_test)))

In [None]:
# Making train set LSTM compatible
training_set = df_train.values
training_set = np.reshape(training_set, (len(training_set), 1))
sc = MinMaxScaler()
training_set = sc.fit_transform(training_set)
#print(len(training_set))
X_train = training_set[0:len(training_set)-1]
#print(X_train)
#print(len(X_train))
y_train = training_set[1:len(training_set)]
#print(y_train)
#print(len(y_train))
X_train = np.reshape(X_train, (len(X_train), 1, 1))

In [None]:
# Making an LSTM model
regressor = Sequential()

regressor.add(LSTM(units = 4, activation = 'sigmoid', input_shape = (None, 1)))

regressor.add(Dense(units = 1))

regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

regressor.fit(X_train, y_train, batch_size = 5, epochs = 50)

In [None]:
# Making test set LSTM compatible and then making predictions of test set
test_set = df_test.values
inputs = np.reshape(test_set, (len(test_set), 1))
inputs = sc.transform(inputs)
inputs = np.reshape(inputs, (len(inputs), 1, 1))
predicted_BTC_price = regressor.predict(inputs)
prediction1_inverse = sc.inverse_transform(predicted_BTC_price)
Y_test_inverse = sc.inverse_transform(inputs.reshape(-1,1))
prediction2_inverse = np.array(prediction1_inverse[:,0][0:])
Y_test2_inverse = np.array(Y_test_inverse[:,0])

In [None]:
# Plotting the result of test set in comparision with actual test set time
trace1 = go.Scatter(
    x = np.arange(0, len(prediction2_inverse), 1),
    y = prediction2_inverse,
    mode = 'lines',
    name = 'Predicted labels',
    line = dict(color=('rgb(244, 146, 65)'), width=2)
)
trace2 = go.Scatter(
    x = np.arange(0, len(Y_test2_inverse), 1),
    y = Y_test2_inverse,
    mode = 'lines',
    name = 'True labels',
    line = dict(color=('rgb(66, 244, 155)'), width=2)
)

data = [trace1, trace2]
layout = dict(title = 'Comparison of true prices (on the test dataset) with prices our model predicted',
             xaxis = dict(title = 'Day number'), yaxis = dict(title = 'Price, USD'))
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='results_demonstrating0')

In [None]:
RMSE = sqrt(mean_squared_error(Y_test2_inverse, prediction2_inverse))
print('RMSE error for day wise data: %.3f' % RMSE)

Prediction using Recurrent neural networks for minute wise data

In [None]:
# Converting the data into minute format and also grouping weighted price minute wise
df1['minute'] = df['Timestamp']
group1 = df1.groupby('minute')
Real_Price1 = group1['Weighted_Price'].mean()

In [None]:
Real_Price1.shape

In [None]:
# Creating train and test sets
prediction_days = 300000
df_train = Real_Price1[:len(Real_Price)-prediction_days-1]
df_test = Real_Price1[len(Real_Price)-prediction_days:]

In [None]:
print("Train data length:" + str(len(df_train)))
print("Test data length:" + str(len(df_test)))

In [None]:
# Making train set LSTM compatible
training_set = df_train.values
training_set = np.reshape(training_set, (len(training_set), 1))
sc = MinMaxScaler()
training_set = sc.fit_transform(training_set)
#print(len(training_set))
X_train = training_set[0:len(training_set)-1]
#print(X_train)
#print(len(X_train))
y_train = training_set[1:len(training_set)]
#print(y_train)
#print(len(y_train))
X_train = np.reshape(X_train, (len(X_train), 1, 1))

In [None]:
# Making an LSTM model
regressor = Sequential()

regressor.add(LSTM(units = 4, activation = 'sigmoid', input_shape = (None, 1)))

regressor.add(Dense(units = 1))

regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

regressor.fit(X_train, y_train, batch_size = 50, epochs = 10)

In [None]:
# Making test set LSTM compatible and then making predictions of test set
test_set = df_test.values
inputs = np.reshape(test_set, (len(test_set), 1))
inputs = sc.transform(inputs)
inputs = np.reshape(inputs, (len(inputs), 1, 1))
predicted_BTC_price = regressor.predict(inputs)
prediction1_inverse = sc.inverse_transform(predicted_BTC_price)
Y_test_inverse = sc.inverse_transform(inputs.reshape(-1,1))
prediction2_inverse = np.array(prediction1_inverse[:,0][0:])
Y_test2_inverse = np.array(Y_test_inverse[:,0])

In [None]:
# Plotting the result of test set in comparision with actual test set time
trace1 = go.Scatter(
    x = np.arange(0, len(prediction2_inverse), 1),
    y = prediction2_inverse,
    mode = 'lines',
    name = 'Predicted labels',
    line = dict(color=('rgb(244, 146, 65)'), width=2)
)
trace2 = go.Scatter(
    x = np.arange(0, len(Y_test2_inverse), 1),
    y = Y_test2_inverse,
    mode = 'lines',
    name = 'True labels',
    line = dict(color=('rgb(66, 244, 155)'), width=2)
)

data = [trace1, trace2]
layout = dict(title = 'Comparison of true prices (on the test dataset) with prices our model predicted',
             xaxis = dict(title = 'Minute number'), yaxis = dict(title = 'Price, USD'))
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='results_demonstrating0')

In [None]:
RMSE = sqrt(mean_squared_error(Y_test2_inverse, prediction2_inverse))
print('RMSE error for minute wise data: %.3f' % RMSE)