In [71]:
from pycaret.regression import *

In [72]:
import pandas as pd
import numpy as np
import pandas_datareader as pdr
from datetime import datetime

In [73]:
COINS=['GGG-USD']

data = pdr.yahoo.daily.YahooDailyReader(symbols=COINS, start=datetime(2008, 11, 9), end=datetime(2022, 8, 27), get_actions=True)
df = data.read()
df=df.iloc[:, df.columns.get_level_values(0)=='Close']

In [74]:
df

Attributes,Close
Symbols,GGG-USD
Date,Unnamed: 1_level_2
2021-11-11,3.778361
2021-11-12,5.495234
2021-11-13,8.327300
2021-11-14,6.822289
2021-11-15,5.918199
...,...
2022-08-24,0.076640
2022-08-25,0.076580
2022-08-26,0.071784
2022-08-27,0.070295


In [75]:
df = df.droplevel(level=1,axis=1)

In [76]:
df['Series'] = np.arange(1,len(df)+1)

In [77]:
df = df.reset_index()

In [78]:
df

Attributes,Date,Close,Series
0,2021-11-11,3.778361,1
1,2021-11-12,5.495234,2
2,2021-11-13,8.327300,3
3,2021-11-14,6.822289,4
4,2021-11-15,5.918199,5
...,...,...,...
286,2022-08-24,0.076640,287
287,2022-08-25,0.076580,288
288,2022-08-26,0.071784,289
289,2022-08-27,0.070295,290


In [79]:
df['Month'] = [i.month for i in df['Date']]

In [80]:
df['Year'] = [i.year for i in df['Date']]

In [81]:
df

Attributes,Date,Close,Series,Month,Year
0,2021-11-11,3.778361,1,11,2021
1,2021-11-12,5.495234,2,11,2021
2,2021-11-13,8.327300,3,11,2021
3,2021-11-14,6.822289,4,11,2021
4,2021-11-15,5.918199,5,11,2021
...,...,...,...,...,...
286,2022-08-24,0.076640,287,8,2022
287,2022-08-25,0.076580,288,8,2022
288,2022-08-26,0.071784,289,8,2022
289,2022-08-27,0.070295,290,8,2022


In [82]:
df.drop(['Date'], axis=1, inplace=True)

In [83]:
df= df[['Series', 'Year', 'Month', 'Close']] 

In [84]:
df.head(5)

Attributes,Series,Year,Month,Close
0,1,2021,11,3.778361
1,2,2021,11,5.495234
2,3,2021,11,8.3273
3,4,2021,11,6.822289
4,5,2021,11,5.918199


In [85]:
train = df[df['Series'] <= (len(df)*0.90) ]
test = df[df['Series'] > (len(df)*0.90)]

In [86]:
train.shape, test.shape

((261, 4), (30, 4))

In [87]:
s = setup(data = train,
          test_data = test,
          target = "Close", 
          fold_strategy = 'timeseries', 
          numeric_features = ['Year', 'Series'], 
          fold = 3, 
          transform_target = True, 
          session_id = 123)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,Close
2,Original Data,"(261, 4)"
3,Missing Values,False
4,Numeric Features,2
5,Categorical Features,1
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(261, 11)"


In [88]:
df['Close'] 

0      3.778361
1      5.495234
2      8.327300
3      6.822289
4      5.918199
         ...   
286    0.076640
287    0.076580
288    0.071784
289    0.070295
290    0.068806
Name: Close, Length: 291, dtype: float64

In [89]:
best = compare_models(sort = 'MAE')

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
omp,Orthogonal Matching Pursuit,0.6672,1.2548,0.688,-3.1139,0.2212,0.5964,0.61
br,Bayesian Ridge,0.6825,1.3168,0.7031,-3.2428,0.2243,0.6036,0.5967
en,Elastic Net,0.6917,1.3519,0.712,-3.4219,0.227,0.6183,0.5967
lasso,Lasso Regression,0.7163,1.4502,0.7362,-3.743,0.233,0.6416,1.0233
dt,Decision Tree Regressor,0.789,1.3974,0.8431,-6.8033,0.321,1.2085,0.01
gbr,Gradient Boosting Regressor,0.7925,1.4179,0.8461,-6.6556,0.3206,1.1987,0.01
rf,Random Forest Regressor,0.8072,1.4829,0.8596,-6.8122,0.3225,1.201,0.0633
knn,K Neighbors Regressor,0.8507,1.6767,0.9003,-7.006,0.3286,1.2054,0.8067
huber,Huber Regressor,0.8511,2.0367,0.8704,-5.4349,0.2629,0.7289,0.8433
ada,AdaBoost Regressor,0.8807,1.7043,0.933,-9.2576,0.3479,1.3599,0.0267


In [90]:
prediction_holdout = predict_model(best);

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Orthogonal Matching Pursuit,0.0476,0.0023,0.048,-17.8665,0.0451,0.5387


In [91]:
predictions = predict_model(best, data=df)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Orthogonal Matching Pursuit,0.5422,1.6392,1.2803,0.823,0.1613,0.2633


In [92]:
predictions

Attributes,Series,Year,Month,Close,Label
0,1,2021,11,3.778361,12.264879
1,2,2021,11,5.495234,12.015952
2,3,2021,11,8.327300,11.772057
3,4,2021,11,6.822289,11.533093
4,5,2021,11,5.918199,11.298959
...,...,...,...,...,...
286,287,2022,8,0.076640,0.032412
287,288,2022,8,0.076580,0.031738
288,289,2022,8,0.071784,0.031078
289,290,2022,8,0.070295,0.030432


In [93]:
import plotly.express as px

In [94]:
fig = px.line(predictions, x='Series', y=["Close", "Label"], template = 'plotly_dark')

In [95]:
fig.show()

In [96]:
test

Attributes,Series,Year,Month,Close
261,262,2022,7,0.093432
262,263,2022,7,0.092688
263,264,2022,8,0.094416
264,265,2022,8,0.0975
265,266,2022,8,0.102488
266,267,2022,8,0.098033
267,268,2022,8,0.101625
268,269,2022,8,0.101264
269,270,2022,8,0.101721
270,271,2022,8,0.10368
