In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression

In [10]:
## read the dataset
df = pd.read_csv('bitcoin.csv')

In [11]:
df.head()

Unnamed: 0,Date,Price
0,5/23/2019,7881.84668
1,5/24/2019,7987.371582
2,5/25/2019,8052.543945
3,5/26/2019,8673.21582
4,5/27/2019,8805.77832


In [13]:
df.drop(['Date'],1,inplace=True)

In [14]:
df.head()

Unnamed: 0,Price
0,7881.84668
1,7987.371582
2,8052.543945
3,8673.21582
4,8805.77832


In [29]:
predictionDays = 30

df['Prediction'] = df[['Price']].shift(-predictionDays)
df.head()

Unnamed: 0,Price,Prediction
0,7881.84668,10701.69141
1,7987.371582,10855.37109
2,8052.543945,11011.10254
3,8673.21582,11790.91699
4,8805.77832,13016.23145


In [30]:
df.tail()

Unnamed: 0,Price,Prediction
362,9729.038086,
363,9522.981445,
364,9081.761719,
365,9182.577148,
366,9180.045898,


In [31]:
# Here we will convert the data frame into a numpy array and drop the prediction column
x = np.array(df.drop(['Prediction'],1))
# Remove the last 'n' rows where 'n' is the predictionDays
x = x[:len(df)-predictionDays]
print(x)

[[ 7881.84668 ]
 [ 7987.371582]
 [ 8052.543945]
 [ 8673.21582 ]
 [ 8805.77832 ]
 [ 8719.961914]
 [ 8659.487305]
 [ 8319.472656]
 [ 8574.501953]
 [ 8564.016602]
 [ 8742.958008]
 [ 8208.995117]
 [ 7707.770996]
 [ 7824.231445]
 [ 7822.023438]
 [ 8043.951172]
 [ 7954.12793 ]
 [ 7688.077148]
 [ 8000.32959 ]
 [ 7927.714355]
 [ 8145.857422]
 [ 8230.923828]
 [ 8693.833008]
 [ 8838.375   ]
 [ 8994.488281]
 [ 9320.352539]
 [ 9081.762695]
 [ 9273.521484]
 [ 9527.160156]
 [10144.55664 ]
 [10701.69141 ]
 [10855.37109 ]
 [11011.10254 ]
 [11790.91699 ]
 [13016.23145 ]
 [11182.80664 ]
 [12407.33203 ]
 [11959.37109 ]
 [10817.15527 ]
 [10583.13477 ]
 [10801.67773 ]
 [11961.26953 ]
 [11215.4375  ]
 [10978.45996 ]
 [11208.55078 ]
 [11450.84668 ]
 [12285.95801 ]
 [12573.8125  ]
 [12156.5127  ]
 [11358.66211 ]
 [11815.98633 ]
 [11392.37891 ]
 [10256.05859 ]
 [10895.08984 ]
 [ 9477.641602]
 [ 9693.802734]
 [10666.48242 ]
 [10530.73242 ]
 [10767.13965 ]
 [10599.10547 ]
 [10343.10645 ]
 [ 9900.767578]
 [ 9811.

In [32]:
y = np.array(df['Prediction'])
# Get all the values except last 'n' rows
y = y[:-predictionDays]
print(y)

[10701.69141  10855.37109  11011.10254  11790.91699  13016.23145
 11182.80664  12407.33203  11959.37109  10817.15527  10583.13477
 10801.67773  11961.26953  11215.4375   10978.45996  11208.55078
 11450.84668  12285.95801  12573.8125   12156.5127   11358.66211
 11815.98633  11392.37891  10256.05859  10895.08984   9477.641602
  9693.802734 10666.48242  10530.73242  10767.13965  10599.10547
 10343.10645   9900.767578  9811.925781  9911.841797  9870.303711
  9477.677734  9552.860352  9519.145508  9607.423828 10085.62793
 10399.66895  10518.17481  10821.72656  10970.18457  11805.65332
 11478.16895  11941.96875  11966.40723  11862.93652  11354.02441
 11523.5791   11382.61621  10895.83008  10051.7041   10311.5459
 10374.33887  10231.74414  10345.81055  10916.05371  10763.23242
 10138.04981  10131.05566  10407.96484  10159.96094  10138.51758
 10370.82031  10185.5       9754.422852  9510.200195  9598.173828
  9630.664063  9757.970703 10346.76074  10623.54004  10594.49316
 10575.5332   10353.302

In [33]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size = 0.2)

In [34]:
predictionDays_array = np.array(df.drop(['Prediction'],1))[-predictionDays:]
print(predictionDays_array)

[[7550.900879]
 [7569.936035]
 [7679.867188]
 [7795.601074]
 [7807.058594]
 [8801.038086]
 [8658.553711]
 [8864.766602]
 [8988.59668 ]
 [8897.46875 ]
 [8912.654297]
 [9003.070313]
 [9268.761719]
 [9951.518555]
 [9842.666016]
 [9593.896484]
 [8756.430664]
 [8601.795898]
 [8804.477539]
 [9269.987305]
 [9733.72168 ]
 [9328.197266]
 [9377.013672]
 [9670.739258]
 [9726.575195]
 [9729.038086]
 [9522.981445]
 [9081.761719]
 [9182.577148]
 [9180.045898]]


In [35]:
## SVR
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.00001)
svr_rbf.fit(xtrain, ytrain)

SVR(C=1000.0, gamma=1e-05)

In [36]:
## testing
svr_rbf_confidence = svr_rbf.score(xtest,ytest)
print('SVR_RBF accuracy :',svr_rbf_confidence)

SVR_RBF accuracy : 0.22298786393432468


In [37]:
# print the predicted values
svm_prediction = svr_rbf.predict(xtest)
print(svm_prediction)
print()
print(ytest)

[10857.02978082  8421.15718012  9329.38957695  9959.50746552
  9255.93875849  8296.41721768  9404.50592278  9171.78534822
  9413.71424121  7321.14930995  7059.02420993  8285.00667518
  7699.42862488  8426.57269952  9246.45966989 10248.57382431
  7794.49258101  9298.4415402  10327.3087781   8408.83028457
  9132.75784386  8785.544435    8291.31424194  9240.35537408
  8968.75972168  9313.37341219  8296.28854714  9024.96379448
 10364.71054202  8921.37491621 10672.93174415  8819.78808154
  8787.6622835   7736.52332259  9938.00089852  9400.18837857
  8365.64327947  7626.53529529  9269.38968609  8647.3151929
  9601.18121432  9277.25023671  8961.09252729  8671.61674402
  8825.06017571  8295.99836366  8387.87633259 10398.36750739
  7334.58430158  8283.99046685  8580.21905803  9201.09868646
  8161.52674728  8656.89591699  8287.78746254  9209.50156527
  8397.16048446  8348.1746461   9339.83545285  8244.22906999
  7465.9621452   9197.97108475  8558.49872202  9735.87484401
  8861.11722671  8294.289

In [38]:
# Print the model predictions for the next 30 days
svm_prediction = svr_rbf.predict(predictionDays_array)
print(svm_prediction)
print()

[8161.27558095 8166.21767305 8406.92431465 8864.48323894 8907.42131188
 9289.85909772 9125.36765152 9070.62097761 8292.54683118 8895.17525388
 8802.98628349 8194.89184898 7559.24224012 8430.45404513 8347.601039
 8296.41841423 9329.1776316  8918.10960883 9282.80913859 7562.24538727
 8285.76804875 7738.39544806 7908.92160343 8286.49123146 8284.60283068
 8284.96207193 8260.89562364 7736.5278914  7467.80480482 7469.33128419]



In [39]:
#Print the actual price for bitcoin for last 30 days
print(df.tail(predictionDays))

           Price  Prediction
337  7550.900879         NaN
338  7569.936035         NaN
339  7679.867188         NaN
340  7795.601074         NaN
341  7807.058594         NaN
342  8801.038086         NaN
343  8658.553711         NaN
344  8864.766602         NaN
345  8988.596680         NaN
346  8897.468750         NaN
347  8912.654297         NaN
348  9003.070313         NaN
349  9268.761719         NaN
350  9951.518555         NaN
351  9842.666016         NaN
352  9593.896484         NaN
353  8756.430664         NaN
354  8601.795898         NaN
355  8804.477539         NaN
356  9269.987305         NaN
357  9733.721680         NaN
358  9328.197266         NaN
359  9377.013672         NaN
360  9670.739258         NaN
361  9726.575195         NaN
362  9729.038086         NaN
363  9522.981445         NaN
364  9081.761719         NaN
365  9182.577148         NaN
366  9180.045898         NaN


In [40]:
lr = LinearRegression(normalize=True)
lr.fit(xtrain, ytrain)

LinearRegression(normalize=True)

In [42]:
## testing
lr_confidence = lr.score(xtest,ytest)
print('lr accuracy :',lr_confidence)

lr accuracy : 0.09171914698093675


In [43]:
# print the predicted values
lr_prediction = lr.predict(xtest)
print(lr_prediction)
print()
print(ytest)

[9497.26849762 9126.36994334 8825.0156024  9608.26026703 8839.83789844
 9042.61845503 9802.34948461 8803.16070029 8669.73739985 8180.73893748
 7913.91322082 9073.83874333 8968.14231327 9235.50314242 8612.42283725
 9759.71165361 8975.24570029 8689.53051637 9348.48253653 8476.70402317
 8800.15012065 9915.65877801 9080.2668323  8695.43712498 8585.91950729
 8687.81325799 9214.05291901 8712.75727426 9362.23410997 8335.1351254
 9514.74321552 8863.98731406 8282.75424176 8907.98974721 9605.6138477
 8642.19261    9176.72743225 8962.22394933 8812.28848165 8430.86477854
 9296.60112493 8837.62216993 8585.35400775 8413.82950515 8778.07362077
 9044.15513369 9114.8426024  9533.37094184 7963.0738071  9063.80381138
 9247.64750365 9279.08989043 8517.2828195  8424.40630124 9056.89876195
 8607.81667805 9117.74763527 8483.48989498 8684.46252811 9017.40907151
 8939.73851903 8805.31740293 8456.81649003 9302.85913866 8725.14674598
 8489.64048641 8166.10785062 8420.00510757]

[10178.37207   5225.629395  7217.4

In [44]:
# Print the model predictions for the next 30 days
lr_prediction = lr.predict(predictionDays_array)
print(lr_prediction)
print()

#Print the actual price for bitcoin for last 30 days
print(df.tail(predictionDays))

[8515.91596585 8520.79111886 8548.94592839 8578.58689392 8581.52131501
 8836.09249442 8799.60037795 8852.41420288 8884.12870932 8860.78965172
 8864.6788694  8887.83559649 8955.8826489  9130.74562585 9102.8670635
 9039.15392488 8824.66794883 8785.06395792 8836.97338343 8956.19653754
 9074.96499075 8971.10487171 8983.6073933  9058.83436674 9073.13468224
 9073.76546092 9020.99165366 8907.98949725 8933.80965035 8933.16136404]

           Price  Prediction
337  7550.900879         NaN
338  7569.936035         NaN
339  7679.867188         NaN
340  7795.601074         NaN
341  7807.058594         NaN
342  8801.038086         NaN
343  8658.553711         NaN
344  8864.766602         NaN
345  8988.596680         NaN
346  8897.468750         NaN
347  8912.654297         NaN
348  9003.070313         NaN
349  9268.761719         NaN
350  9951.518555         NaN
351  9842.666016         NaN
352  9593.896484         NaN
353  8756.430664         NaN
354  8601.795898         NaN
355  8804.477539      