In [1]:
import pandas as  pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('bitcoin.csv')

In [3]:
data.head()

Unnamed: 0,Date,Price
0,5/23/2019,7881.84668
1,5/24/2019,7987.371582
2,5/25/2019,8052.543945
3,5/26/2019,8673.21582
4,5/27/2019,8805.77832


In [4]:
data.dtypes

Date      object
Price    float64
dtype: object

In [5]:
data.drop('Date', axis=1, inplace=True)

In [6]:
data.head()

Unnamed: 0,Price
0,7881.84668
1,7987.371582
2,8052.543945
3,8673.21582
4,8805.77832


In [7]:
#shifting up by n units and creating a new column

prediction_days = 30

data['Prediction'] = data[['Price']].shift(-prediction_days)

In [8]:
data.head()

Unnamed: 0,Price,Prediction
0,7881.84668,10701.69141
1,7987.371582,10855.37109
2,8052.543945,11011.10254
3,8673.21582,11790.91699
4,8805.77832,13016.23145


In [9]:
data.isnull().sum()

Price          0
Prediction    30
dtype: int64

In [10]:
data.shape

(367, 2)

In [11]:
X = np.array(data.drop(['Prediction'],1))

#remove last n rows which are the prediction days

X = X[:len(data)-prediction_days]

In [12]:
X

array([[ 7881.84668 ],
       [ 7987.371582],
       [ 8052.543945],
       [ 8673.21582 ],
       [ 8805.77832 ],
       [ 8719.961914],
       [ 8659.487305],
       [ 8319.472656],
       [ 8574.501953],
       [ 8564.016602],
       [ 8742.958008],
       [ 8208.995117],
       [ 7707.770996],
       [ 7824.231445],
       [ 7822.023438],
       [ 8043.951172],
       [ 7954.12793 ],
       [ 7688.077148],
       [ 8000.32959 ],
       [ 7927.714355],
       [ 8145.857422],
       [ 8230.923828],
       [ 8693.833008],
       [ 8838.375   ],
       [ 8994.488281],
       [ 9320.352539],
       [ 9081.762695],
       [ 9273.521484],
       [ 9527.160156],
       [10144.55664 ],
       [10701.69141 ],
       [10855.37109 ],
       [11011.10254 ],
       [11790.91699 ],
       [13016.23145 ],
       [11182.80664 ],
       [12407.33203 ],
       [11959.37109 ],
       [10817.15527 ],
       [10583.13477 ],
       [10801.67773 ],
       [11961.26953 ],
       [11215.4375  ],
       [109

In [13]:
y = np.array(data['Prediction'])

In [14]:
# remove the last n rows and create a independant dataset

y = y[:-prediction_days]

In [15]:
y

array([10701.69141 , 10855.37109 , 11011.10254 , 11790.91699 ,
       13016.23145 , 11182.80664 , 12407.33203 , 11959.37109 ,
       10817.15527 , 10583.13477 , 10801.67773 , 11961.26953 ,
       11215.4375  , 10978.45996 , 11208.55078 , 11450.84668 ,
       12285.95801 , 12573.8125  , 12156.5127  , 11358.66211 ,
       11815.98633 , 11392.37891 , 10256.05859 , 10895.08984 ,
        9477.641602,  9693.802734, 10666.48242 , 10530.73242 ,
       10767.13965 , 10599.10547 , 10343.10645 ,  9900.767578,
        9811.925781,  9911.841797,  9870.303711,  9477.677734,
        9552.860352,  9519.145508,  9607.423828, 10085.62793 ,
       10399.66895 , 10518.17481 , 10821.72656 , 10970.18457 ,
       11805.65332 , 11478.16895 , 11941.96875 , 11966.40723 ,
       11862.93652 , 11354.02441 , 11523.5791  , 11382.61621 ,
       10895.83008 , 10051.7041  , 10311.5459  , 10374.33887 ,
       10231.74414 , 10345.81055 , 10916.05371 , 10763.23242 ,
       10138.04981 , 10131.05566 , 10407.96484 , 10159.

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 42)

In [17]:
prediction_days_array = np.array(data.drop(['Prediction'],1))[-prediction_days:]

In [18]:
from sklearn.svm import SVR

In [19]:
model = SVR(kernel='rbf', C=1e3, gamma=0.00001)

In [20]:
model.fit(X_train, y_train)

SVR(C=1000.0, gamma=1e-05)

In [21]:
model_score = model.score(X_test, y_test)

In [22]:
model_score

0.27199896523743483

In [23]:
model_prediction = model.predict(X_test)
model_prediction

array([ 9669.5714263 ,  8114.93366243,  9444.50551932,  8093.03829365,
        8444.51250859,  8127.86341915,  8859.69081069,  8068.33037777,
        7904.82131212,  8914.37675434,  8289.06552763,  7892.35609633,
       10508.47917114, 10608.19435518, 10196.90576835,  8184.93388787,
        8515.00357718,  8092.53402934, 10550.28617998,  8275.61261638,
       10204.22247397,  8770.19191391,  8910.97159507,  8640.45735018,
        8643.73489523,  8212.79607023,  8198.81195442,  8251.02360716,
        8720.90437186,  8299.8782536 ,  8354.33187407,  8889.92874061,
        8352.70411864,  8460.85706387,  8012.3235133 ,  8241.08523612,
       10050.16406681,  8362.66741952,  8912.7768677 ,  8474.09324612,
       10145.05972247,  8890.22157404,  9909.94618303,  8379.60978697,
        8466.34385766, 10549.03654143,  9597.63357946,  8316.4042804 ,
        8263.53318019,  8460.90803189,  8862.1712958 ,  7963.2253527 ,
        9705.07118876, 10222.60979541,  8224.0364065 ,  7823.44115605,
      

In [24]:
#model prediction for the next 30 days
model_array_prediction = model.predict(prediction_days_array)

In [25]:
model_array_prediction

array([8106.10780127, 8190.05713994, 8918.94294816, 9650.90126419,
       9698.25792341, 8165.95971557, 8381.62898562, 8024.63671485,
       7677.8934262 , 7939.0406864 , 7896.84701358, 7638.29411374,
       7837.96783738, 8143.01679977, 7961.54824096, 8415.79895363,
       8244.93081072, 8457.63977887, 8159.22551664, 7843.72612801,
       7999.79153319, 8132.31151043, 8359.44831698, 8160.86160002,
       8013.58103782, 8008.67963566, 8584.25394225, 7477.90684036,
       7526.05003977, 7520.42280476])

In [27]:
data.tail(prediction_days)

Unnamed: 0,Price,Prediction
337,7550.900879,
338,7569.936035,
339,7679.867188,
340,7795.601074,
341,7807.058594,
342,8801.038086,
343,8658.553711,
344,8864.766602,
345,8988.59668,
346,8897.46875,
