In [1]:
#Imports
import pandas as pd

In [2]:
#load data
df = pd.read_csv("BTC-USD.csv")
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,2.105680e+07
1,2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,3.448320e+07
2,2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,3.791970e+07
3,2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,3.686360e+07
4,2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,2.658010e+07
...,...,...,...,...,...,...,...
2637,2021-12-06,49413.480469,50929.519531,47281.035156,50582.625000,50582.625000,3.770731e+10
2638,2021-12-07,50581.828125,51934.781250,50175.808594,50700.085938,50700.085938,3.367681e+10
2639,2021-12-08,50667.648438,51171.375000,48765.988281,50504.796875,50504.796875,2.847970e+10
2640,2021-12-09,50450.082031,50797.164063,47358.351563,47672.121094,47672.121094,2.960358e+10


In [3]:
#Create new df including only the date and close columns
df = df[['Date', 'Close']]
#Rename columns with corresponding values
df = df.rename(columns={'Date':'ds', 'Close':'y'})
#Print info
df

Unnamed: 0,ds,y
0,2014-09-17,457.334015
1,2014-09-18,424.440002
2,2014-09-19,394.795990
3,2014-09-20,408.903992
4,2014-09-21,398.821014
...,...,...
2637,2021-12-06,50582.625000
2638,2021-12-07,50700.085938
2639,2021-12-08,50504.796875
2640,2021-12-09,47672.121094


In [4]:
#Get last 90 rows of data to predict
last = df[len(df)-90:]
last

Unnamed: 0,ds,y
2552,2021-09-12,46063.269531
2553,2021-09-13,44963.074219
2554,2021-09-14,47092.492188
2555,2021-09-15,48176.347656
2556,2021-09-16,47783.359375
...,...,...
2637,2021-12-06,50582.625000
2638,2021-12-07,50700.085938
2639,2021-12-08,50504.796875
2640,2021-12-09,47672.121094


In [5]:
#Get all rows in dataset except last 90
df = df[:-90]
df

Unnamed: 0,ds,y
0,2014-09-17,457.334015
1,2014-09-18,424.440002
2,2014-09-19,394.795990
3,2014-09-20,408.903992
4,2014-09-21,398.821014
...,...,...
2547,2021-09-07,46811.128906
2548,2021-09-08,46091.390625
2549,2021-09-09,46391.421875
2550,2021-09-10,44883.910156


In [6]:
#Calling fbprophet library  
from fbprophet import Prophet

#Creating the Prophet object and fitting the model with the data seperated above
fbp = Prophet(daily_seasonality = True, changepoint_prior_scale = 0.5, seasonality_mode = 'multiplicative')
fbp.fit(df)
future = fbp.make_future_dataframe(periods=90)
forecast = fbp.predict(future)

INFO:numexpr.utils:NumExpr defaulting to 4 threads.


In [7]:
#Importing facebook inbuilt library for plotting data
from fbprophet.plot import plot_plotly
#Plot the data
plot_plotly(fbp, forecast)

In [8]:
#How to show the predicted price for a specific date
forecast[forecast.ds == '2021-12-10']['yhat']

2641    48156.416131
Name: yhat, dtype: float64

In [9]:
#Show acutal value for a data
last[last.ds == '2021-12-10']['y']

2641    48460.320313
Name: y, dtype: float64

In [10]:
#Calling libraries used to generate mist common regression metrics
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [11]:
#Create New df wich contains the actual and predicted values including the dates to be used for the regression metrics
metric_df = pd.concat([forecast.set_index('ds')[['yhat']].reset_index(), df['y']], axis=1)
print(metric_df)

             ds          yhat           y
0    2014-09-17    497.293870  457.334015
1    2014-09-18    482.641082  424.440002
2    2014-09-19    474.084956  394.795990
3    2014-09-20    460.922141  408.903992
4    2014-09-21    445.834970  398.821014
...         ...           ...         ...
2637 2021-12-06  47379.144190         NaN
2638 2021-12-07  47155.216805         NaN
2639 2021-12-08  47467.769356         NaN
2640 2021-12-09  47529.051085         NaN
2641 2021-12-10  48156.416131         NaN

[2642 rows x 3 columns]


In [12]:
metric_df.tail()

Unnamed: 0,ds,yhat,y
2637,2021-12-06,47379.14419,
2638,2021-12-07,47155.216805,
2639,2021-12-08,47467.769356,
2640,2021-12-09,47529.051085,
2641,2021-12-10,48156.416131,


In [13]:
#remove values including nulls as these are the predicted values
metric_df = metric_df.dropna()

In [14]:
#Check the excess prediction values are removed
metric_df.tail()

Unnamed: 0,ds,yhat,y
2547,2021-09-07,44110.676313,46811.128906
2548,2021-09-08,43765.62698,46091.390625
2549,2021-09-09,43138.446355,46391.421875
2550,2021-09-10,42921.659656,44883.910156
2551,2021-09-11,42301.926306,45201.457031


In [15]:
print(forecast['yhat'][2642-90:])

2552    41474.289858
2553    40771.796374
2554    39759.634814
2555    39177.356331
2556    38334.661274
            ...     
2637    47379.144190
2638    47155.216805
2639    47467.769356
2640    47529.051085
2641    48156.416131
Name: yhat, Length: 90, dtype: float64


In [16]:
print(last['y'])

2552    46063.269531
2553    44963.074219
2554    47092.492188
2555    48176.347656
2556    47783.359375
            ...     
2637    50582.625000
2638    50700.085938
2639    50504.796875
2640    47672.121094
2641    48460.320313
Name: y, Length: 90, dtype: float64


In [17]:
print(last)

              ds             y
2552  2021-09-12  46063.269531
2553  2021-09-13  44963.074219
2554  2021-09-14  47092.492188
2555  2021-09-15  48176.347656
2556  2021-09-16  47783.359375
...          ...           ...
2637  2021-12-06  50582.625000
2638  2021-12-07  50700.085938
2639  2021-12-08  50504.796875
2640  2021-12-09  47672.121094
2641  2021-12-10  48460.320313

[90 rows x 2 columns]


In [18]:
forecast = forecast[['ds','yhat']]
forecast = forecast[2642-90:]
print(forecast)

             ds          yhat
2552 2021-09-12  41474.289858
2553 2021-09-13  40771.796374
2554 2021-09-14  39759.634814
2555 2021-09-15  39177.356331
2556 2021-09-16  38334.661274
...         ...           ...
2637 2021-12-06  47379.144190
2638 2021-12-07  47155.216805
2639 2021-12-08  47467.769356
2640 2021-12-09  47529.051085
2641 2021-12-10  48156.416131

[90 rows x 2 columns]


In [19]:
#Calculating the r squared
r2_score(last['y'], forecast['yhat'])

-6.671549275286925

In [20]:
df = df.dropna()
df

Unnamed: 0,ds,y
0,2014-09-17,457.334015
1,2014-09-18,424.440002
2,2014-09-19,394.795990
3,2014-09-20,408.903992
4,2014-09-21,398.821014
...,...,...
2547,2021-09-07,46811.128906
2548,2021-09-08,46091.390625
2549,2021-09-09,46391.421875
2550,2021-09-10,44883.910156


In [21]:
#Scale values between 0 and 1 for RMSE and MAE to be easier tocompare to eacother between models
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler

data = last.filter(['y'])

dataset = data.values

scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
scaled_data

array([[0.19981257],
       [0.15887226],
       [0.23811186],
       [0.27844414],
       [0.26382032],
       [0.24462496],
       [0.28224033],
       [0.24435328],
       [0.08001015],
       [0.        ],
       [0.10720112],
       [0.15634273],
       [0.07985941],
       [0.07527654],
       [0.09358274],
       [0.05738272],
       [0.0126843 ],
       [0.0323999 ],
       [0.11525327],
       [0.27623353],
       [0.26114586],
       [0.27932255],
       [0.31329509],
       [0.40267463],
       [0.54581514],
       [0.4879334 ],
       [0.49395663],
       [0.53118245],
       [0.52386494],
       [0.62482857],
       [0.57110467],
       [0.62171425],
       [0.61875315],
       [0.77773806],
       [0.75162391],
       [0.77623723],
       [0.79381838],
       [0.87702091],
       [0.94142882],
       [0.80066885],
       [0.74418474],
       [0.77028333],
       [0.75306238],
       [0.83154176],
       [0.73196166],
       [0.66195103],
       [0.74157511],
       [0.801

In [22]:
testerrrr = (forecast.set_index('ds')[['yhat']].reset_index())
realer = testerrrr.dropna()

realer = forecast.filter(['yhat'])


dataR = realer

datasetR = dataR.values

scalerR = MinMaxScaler(feature_range=(0,1))
scaled_dataR = scaler.fit_transform(datasetR)

scaled_dataR

array([[0.65519445],
       [0.61894496],
       [0.56671624],
       [0.53666999],
       [0.49318593],
       [0.47265579],
       [0.43277595],
       [0.38392523],
       [0.34367623],
       [0.28945921],
       [0.26003908],
       [0.21944194],
       [0.20457459],
       [0.17262628],
       [0.13408778],
       [0.10668953],
       [0.06747642],
       [0.05554548],
       [0.03428458],
       [0.04082342],
       [0.03155081],
       [0.01678185],
       [0.01413003],
       [0.        ],
       [0.01359292],
       [0.01746411],
       [0.04881146],
       [0.06311377],
       [0.07045044],
       [0.08831098],
       [0.09250793],
       [0.12245081],
       [0.14002208],
       [0.18270121],
       [0.20532041],
       [0.21803225],
       [0.23855024],
       [0.24247462],
       [0.26982645],
       [0.28223256],
       [0.31787527],
       [0.33135526],
       [0.33329644],
       [0.34200737],
       [0.33320995],
       [0.34782434],
       [0.3474547 ],
       [0.371

In [23]:
from sklearn.metrics import mean_squared_error
from math import sqrt

rms = sqrt(mean_squared_error(scaled_data, scaled_dataR))

print(rms)

0.40319321043059564


In [24]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(scaled_data, scaled_dataR)

0.3593653565974075

In [25]:
from sklearn.metrics import accuracy_score