In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
dataset = pd.read_csv("linear_interpolation_removed.csv")
dataset.head()

Unnamed: 0,snapped_at,price,market_cap,total_volume
0,2013-04-28 00:00:00,1019.1988,16387930000.0,4444828000.0
1,2013-04-29 00:00:00,1019.1988,16387930000.0,4444828000.0
2,2013-04-30 00:00:00,1019.1988,16387930000.0,4444828000.0
3,2013-05-01 00:00:00,1019.1988,16387930000.0,4444828000.0
4,2013-05-02 00:00:00,1019.1988,16387930000.0,4444828000.0


In [3]:
dataset['snapped_at'] = pd.to_datetime(dataset['snapped_at'])    
dataset['snapped_at'] = (dataset['snapped_at'] - dataset['snapped_at'].min())  / np.timedelta64(1,'D')
dataset.head()

Unnamed: 0,snapped_at,price,market_cap,total_volume
0,0.0,1019.1988,16387930000.0,4444828000.0
1,1.0,1019.1988,16387930000.0,4444828000.0
2,2.0,1019.1988,16387930000.0,4444828000.0
3,3.0,1019.1988,16387930000.0,4444828000.0
4,4.0,1019.1988,16387930000.0,4444828000.0


In [4]:
X = dataset[['snapped_at', 'market_cap', 'total_volume']]
y = dataset['price']

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.7, 
                                                    test_size = 0.3, random_state = 100)

In [6]:
from sklearn.linear_model import LinearRegression
#Fitting the Multiple Linear Regression model
mlr = LinearRegression()  
mlr.fit(X_train, y_train)

In [7]:
print("Intercept: ", mlr.intercept_)
print("Coefficients:")
list(zip(X, mlr.coef_))

Intercept:  483.984661684015
Coefficients:


[('snapped_at', -0.1564441158098337),
 ('market_cap', 5.220063669424338e-08),
 ('total_volume', 8.648283016693238e-09)]

In [8]:
#Prediction of test set
y_pred_mlr= mlr.predict(X_test)
#Predicted values
print("Prediction for test set: {}".format(y_pred_mlr))

Prediction for test set: [1179.20098041 1297.94850034 9569.95012158 ... 1265.38673169 1198.28716254
 7311.67375213]


In [9]:
#Actual value and the predicted value
mlr_diff = pd.DataFrame({'Actual value': y_test, 'Predicted value': y_pred_mlr})
mlr_diff.head()
mlr_diff.tail()

Unnamed: 0,Actual value,Predicted value
1952,7325.155801,6774.036169
2415,9134.306432,8976.951653
1396,1180.871,1265.386732
1146,1019.1988,1198.287163
1668,8117.664348,7311.673752


In [10]:
from sklearn import metrics
meanAbErr = metrics.mean_absolute_error(y_test, y_pred_mlr)
meanSqErr = metrics.mean_squared_error(y_test, y_pred_mlr)
rootMeanSqErr = np.sqrt(metrics.mean_squared_error(y_test, y_pred_mlr))
print('R squared: {:.2f}'.format(mlr.score(X,y)*100))
print('Mean Absolute Error:', meanAbErr)
print('Mean Square Error:', meanSqErr)
print('Root Mean Square Error:', rootMeanSqErr)

R squared: 99.92
Mean Absolute Error: 344.7488207457652
Mean Square Error: 213338.69904980191
Root Mean Square Error: 461.88602387364125
