In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
dataset = pd.read_csv("linear_interpolation_comp.csv")
dataset.head()

Unnamed: 0,snapped_at,price,market_cap,total_volume
0,2017-01-01 00:00:00 UTC,998.05,16045990000.0,3470830000.0
1,2017-01-02 00:00:00 UTC,1019.1988,16387930000.0,4444828000.0
2,2017-01-03 00:00:00 UTC,1035.5313,16652720000.0,4399421000.0
3,2017-01-04 00:00:00 UTC,1130.849516,18187660000.0,8705359000.0
4,2017-01-05 00:00:00 UTC,990.6675,15934890000.0,12102750000.0


In [3]:
dataset['snapped_at'] = pd.to_datetime(dataset['snapped_at'])    
dataset['snapped_at'] = (dataset['snapped_at'] - dataset['snapped_at'].min())  / np.timedelta64(1,'D')
dataset.head()

Unnamed: 0,snapped_at,price,market_cap,total_volume
0,0.0,998.05,16045990000.0,3470830000.0
1,1.0,1019.1988,16387930000.0,4444828000.0
2,2.0,1035.5313,16652720000.0,4399421000.0
3,3.0,1130.849516,18187660000.0,8705359000.0
4,4.0,990.6675,15934890000.0,12102750000.0


In [4]:
X = dataset[['snapped_at', 'market_cap', 'total_volume']]
y = dataset['price']

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.7, 
                                                    test_size = 0.3, random_state = 100)

In [6]:
from sklearn.linear_model import LinearRegression
#Fitting the Multiple Linear Regression model
mlr = LinearRegression()  
mlr.fit(X_train, y_train)

In [7]:
print("Intercept: ", mlr.intercept_)
print("Coefficients:")
list(zip(X, mlr.coef_))

Intercept:  921.9504157704432
Coefficients:


[('snapped_at', -0.7764506979848738),
 ('market_cap', 5.288577680364554e-08),
 ('total_volume', 9.660103976850616e-09)]

In [8]:
#Prediction of test set
y_pred_mlr= mlr.predict(X_test)
#Predicted values
print("Prediction for test set: {}".format(y_pred_mlr))

Prediction for test set: [60865.27992395 31574.12699015 26764.73122515 17439.36911197
 24779.08345446  7055.62017854  9014.25601902  4268.40335569
 20340.81797635 61336.18002483  6506.20825846 18756.45260796
  6540.06890457 23614.41004693 23068.72646998 10537.33734858
  8192.7063225   1685.61268645 47361.04925353 10702.40767817
 28512.95949922  6534.71792599 26217.49171541 46479.7426091
  4217.13416148 56957.79777409 26394.40802366 57026.58864323
 35004.25800409 26555.76042609  7947.97629107 55294.29141357
 27776.29165482 55603.35419964  6933.69992809  9241.72115405
  9001.64847633 11136.74181589 23513.89450051  1934.17322276
  6944.86925741  1632.61903129  2285.17972195 51173.44555331
  7177.95155026 10334.29060436 23068.45845477  9174.46351589
 47058.62599959 47495.92895825 23688.32627662  6907.85001199
 12459.84874278  6964.19849342  9134.97711664 10671.71545847
 13781.66464035  7200.36802214  9347.05214256 58341.10127243
 55287.5401627   6887.87468421  9016.76672742  9793.88370245


In [9]:
#Actual value and the predicted value
mlr_diff = pd.DataFrame({'Actual value': y_test, 'Predicted value': y_pred_mlr})
mlr_diff.head()
mlr_diff.tail()

Unnamed: 0,Actual value,Predicted value
2104,20161.830615,20005.399432
2203,18866.810331,18893.050152
1371,10572.674843,10410.430953
2227,22946.286579,22875.42102
2383,30620.814247,30638.980684


In [10]:
from sklearn import metrics
meanAbErr = metrics.mean_absolute_error(y_test, y_pred_mlr)
meanSqErr = metrics.mean_squared_error(y_test, y_pred_mlr)
rootMeanSqErr = np.sqrt(metrics.mean_squared_error(y_test, y_pred_mlr))
print('R squared: {:.2f}'.format(mlr.score(X,y)*100))
print('Mean Absolute Error:', meanAbErr)
print('Mean Square Error:', meanSqErr)
print('Root Mean Square Error:', rootMeanSqErr)

R squared: 99.94
Mean Absolute Error: 292.844624916723
Mean Square Error: 191260.66286804352
Root Mean Square Error: 437.33358305536467
