In [1]:
# Import libraries 
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.kernel_ridge import KernelRidge
from sklearn.metrics import classification_report
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score
from pandas.tseries.offsets import DateOffset
from sklearn.preprocessing import StandardScaler
from sklearn import svm
import hvplot.pandas
import plotly.express as px

In [2]:
# Use yfinance to gather historcial data of BTC
crypto_btc = yf.download(tickers='BTC-USD', start='2017-12-01', end='2023-01-30', interval='1d').drop(columns=['Adj Close']).round(2)

[*********************100%***********************]  1 of 1 completed


In [3]:
# Add percent change to dataframe
crypto_btc['Percent Change'] = crypto_btc['Close'].pct_change()

# Calculate EMA for last 9 days
crypto_btc['ema_9'] = crypto_btc['Close'].ewm(9).mean()

# Calculate SMA for last 5 days
crypto_btc['sma_5'] = crypto_btc['Close'].rolling(5).mean()

# Calculate SMA for last 10 days
crypto_btc['sma_10'] = crypto_btc['Close'].rolling(10).mean()

# Calculate SMA for last 15 days
crypto_btc['sma_15'] = crypto_btc['Close'].rolling(15).mean()

# Calculate SMA for last 30 days
crypto_btc['sma_30'] = crypto_btc['Close'].rolling(30).mean()

# Drop all NaN
crypto_btc = crypto_btc.dropna()

# Look at Datafraeme
crypto_btc

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Percent Change,ema_9,sma_5,sma_10,sma_15,sma_30
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-12-30 00:00:00+00:00,14681.90,14681.90,12350.10,12952.20,14452599808,-0.116265,15307.711376,14830.640,14643.950,15906.213333,15332.200000
2017-12-31 00:00:00+00:00,12897.70,14377.40,12755.60,14156.40,12136299520,0.092973,15188.013521,14441.960,14479.300,15550.146667,15438.226667
2018-01-01 00:00:00+00:00,14112.20,14112.20,13154.70,13657.20,10291200000,-0.035263,15029.488936,14005.700,14461.840,15184.573333,15524.313333
2018-01-02 00:00:00+00:00,13625.00,15444.60,13163.60,14982.10,16846600192,0.097011,15024.598925,14080.820,14490.130,14909.100000,15646.276667
2018-01-03 00:00:00+00:00,14978.20,15572.80,14844.50,15201.00,16871900160,0.014611,15042.743690,14189.780,14617.650,14737.386667,15764.403333
...,...,...,...,...,...,...,...,...,...,...,...
2023-01-23 00:00:00+00:00,22721.09,23126.49,22654.30,22934.43,26518700512,0.009419,20387.070211,22439.162,21707.284,20561.982667,18664.842667
2023-01-24 00:00:00+00:00,22929.63,23134.01,22549.74,22636.47,26405069715,-0.012992,20612.010190,22749.098,21873.301,20924.644000,18857.992000
2023-01-25 00:00:00+00:00,22639.27,23722.10,22406.08,23117.86,30685366709,0.021266,20862.595171,22837.360,22097.007,21302.748667,19064.594000
2023-01-26 00:00:00+00:00,23108.96,23237.08,22911.37,23032.78,26357839322,-0.003680,21079.613654,22888.392,22283.322,21642.607333,19275.114333


In [4]:
# Setup X data freatures
X = crypto_btc[['Open', 'High', 'Low', 'Volume', 'Percent Change', 'sma_5', 'sma_10', 'sma_15', 'sma_30', 'ema_9']].copy()
display(X.head())

Unnamed: 0_level_0,Open,High,Low,Volume,Percent Change,sma_5,sma_10,sma_15,sma_30,ema_9
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-12-30 00:00:00+00:00,14681.9,14681.9,12350.1,14452599808,-0.116265,14830.64,14643.95,15906.213333,15332.2,15307.711376
2017-12-31 00:00:00+00:00,12897.7,14377.4,12755.6,12136299520,0.092973,14441.96,14479.3,15550.146667,15438.226667,15188.013521
2018-01-01 00:00:00+00:00,14112.2,14112.2,13154.7,10291200000,-0.035263,14005.7,14461.84,15184.573333,15524.313333,15029.488936
2018-01-02 00:00:00+00:00,13625.0,15444.6,13163.6,16846600192,0.097011,14080.82,14490.13,14909.1,15646.276667,15024.598925
2018-01-03 00:00:00+00:00,14978.2,15572.8,14844.5,16871900160,0.014611,14189.78,14617.65,14737.386667,15764.403333,15042.74369


In [5]:
# Initialize y for prediction
y = crypto_btc['Close']

In [6]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2017-12-30 00:00:00+00:00


In [7]:
# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=6)

# Display the training end date
print(training_end)

2018-06-30 00:00:00+00:00


In [8]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Display sample data
X_train.head()

Unnamed: 0_level_0,Open,High,Low,Volume,Percent Change,sma_5,sma_10,sma_15,sma_30,ema_9
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-12-30 00:00:00+00:00,14681.9,14681.9,12350.1,14452599808,-0.116265,14830.64,14643.95,15906.213333,15332.2,15307.711376
2017-12-31 00:00:00+00:00,12897.7,14377.4,12755.6,12136299520,0.092973,14441.96,14479.3,15550.146667,15438.226667,15188.013521
2018-01-01 00:00:00+00:00,14112.2,14112.2,13154.7,10291200000,-0.035263,14005.7,14461.84,15184.573333,15524.313333,15029.488936
2018-01-02 00:00:00+00:00,13625.0,15444.6,13163.6,16846600192,0.097011,14080.82,14490.13,14909.1,15646.276667,15024.598925
2018-01-03 00:00:00+00:00,14978.2,15572.8,14844.5,16871900160,0.014611,14189.78,14617.65,14737.386667,15764.403333,15042.74369


In [9]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

# Display sample data
X_test.head()

Unnamed: 0_level_0,Open,High,Low,Volume,Percent Change,sma_5,sma_10,sma_15,sma_30,ema_9
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-06-30 00:00:00+00:00,6214.22,6465.51,6214.22,4543860224,0.029863,6155.308,6217.486,6367.04,6793.507667,6476.518548
2018-07-01 00:00:00+00:00,6411.68,6432.85,6289.29,4788259840,-0.002839,6213.738,6183.094,6356.084,6754.986667,6467.448693
2018-07-02 00:00:00+00:00,6380.38,6683.86,6305.7,4396930048,0.03576,6305.148,6236.143,6363.744667,6720.677667,6482.121824
2018-07-03 00:00:00+00:00,6596.66,6671.37,6447.75,4672309760,-0.012789,6430.378,6272.854,6350.062667,6680.989,6486.868641
2018-07-04 00:00:00+00:00,6550.87,6771.92,6450.46,4176689920,0.010408,6506.228,6315.286,6338.57,6650.425,6497.936777


In [10]:
# Instantiate KernelRidge 
kr = KernelRidge(kernel='linear', alpha=5000)

In [11]:
# Fit data into model
kr.fit(X_train, y_train)

  "Singular matrix in solving dual problem. Using "


KernelRidge(alpha=5000)

In [12]:
# Predict the model
predict = kr.predict(X_train)
predict

array([13140., 13942., 13476., 14836., 15624., 15648., 17332., 17364.,
       16636., 15160., 15236., 14724., 14008., 14026., 14700., 13754.,
       14344., 11536., 10720., 12072., 11822., 12788., 11736., 10978.,
       10800., 11278., 11470., 11068., 11370., 11914., 11390., 10438.,
       10220.,  9422.,  8572.,  8930.,  8526.,  7454.,  7380.,  8090.,
        8498.,  8360.,  8756.,  8130.,  8744.,  8595.,  9310., 10038.,
       10046., 10832., 10690., 11008., 11794., 10804., 10398., 10242.,
        9946.,  9601., 10116., 10600., 10698., 10796., 11012., 11234.,
       11130., 11492., 10834., 10132.,  9692.,  8964.,  9082.,  9282.,
        9342.,  9234.,  8440.,  8114.,  8271.,  7924.,  7848.,  8522.,
        8754.,  9035.,  8749.,  8572.,  8796.,  8504.,  8139.,  7967.,
        8059.,  7400.,  7030.,  7137.,  6722.,  7056.,  7413.,  6976.,
        6846.,  6655.,  6907.,  7028.,  6870.,  6805.,  6952.,  7652.,
        8100.,  8014.,  8203.,  8046.,  8152.,  8152.,  8348.,  8776.,
      

In [13]:
# Review data for X_train
df = X_train
df['Predict'] = predict
df['Actual Close'] = crypto_btc['Close']

df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0_level_0,Open,High,Low,Volume,Percent Change,sma_5,sma_10,sma_15,sma_30,ema_9,Predict,Actual Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-12-30 00:00:00+00:00,14681.90,14681.90,12350.10,14452599808,-0.116265,14830.640,14643.950,15906.213333,15332.200000,15307.711376,13140.0,12952.20
2017-12-31 00:00:00+00:00,12897.70,14377.40,12755.60,12136299520,0.092973,14441.960,14479.300,15550.146667,15438.226667,15188.013521,13942.0,14156.40
2018-01-01 00:00:00+00:00,14112.20,14112.20,13154.70,10291200000,-0.035263,14005.700,14461.840,15184.573333,15524.313333,15029.488936,13476.0,13657.20
2018-01-02 00:00:00+00:00,13625.00,15444.60,13163.60,16846600192,0.097011,14080.820,14490.130,14909.100000,15646.276667,15024.598925,14836.0,14982.10
2018-01-03 00:00:00+00:00,14978.20,15572.80,14844.50,16871900160,0.014611,14189.780,14617.650,14737.386667,15764.403333,15042.743690,15624.0,15201.00
...,...,...,...,...,...,...,...,...,...,...,...,...
2018-06-26 00:00:00+00:00,6253.55,6290.16,6093.67,3279759872,-0.024885,6152.450,6427.257,6459.128000,6954.387667,6629.230500,6145.0,6093.67
2018-06-27 00:00:00+00:00,6084.40,6180.00,6052.85,3296219904,0.010414,6167.138,6393.043,6430.779333,6921.759000,6582.020450,6125.0,6157.13
2018-06-28 00:00:00+00:00,6153.16,6170.41,5873.05,3467800064,-0.041203,6115.330,6309.905,6401.015333,6869.454000,6514.162405,5942.0,5903.44
2018-06-29 00:00:00+00:00,5898.13,6261.66,5835.75,3966230016,0.053335,6124.344,6254.741,6370.545333,6829.846667,6484.576164,6114.0,6218.30


In [14]:
# Plot Train Data
df.reset_index(inplace=True)
fig = px.line(df, x='Date', y=['Actual Close' ,'Predict'], title='Close Price with Range Slider')
fig.update_layout(hovermode="x")

fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(count=2, label="2y", step="year", stepmode="backward"),
            dict(step="all")
            ])
            )
            )
fig.show()

In [15]:
# Predict data
predict_test = kr.predict(X_test)
predict_test

array([ 6406.,  6375.,  6526., ..., 23976., 23672., 23384.])

In [16]:
# Review Test Dataframe
df_test = X_test
df_test['Predict'] = predict_test
df_test['Actual Close'] = crypto_btc['Close']

df_test



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,Open,High,Low,Volume,Percent Change,sma_5,sma_10,sma_15,sma_30,ema_9,Predict,Actual Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-06-30 00:00:00+00:00,6214.22,6465.51,6214.22,4543860224,0.029863,6155.308,6217.486,6367.040000,6793.507667,6476.518548,6406.0,6404.00
2018-07-01 00:00:00+00:00,6411.68,6432.85,6289.29,4788259840,-0.002839,6213.738,6183.094,6356.084000,6754.986667,6467.448693,6375.0,6385.82
2018-07-02 00:00:00+00:00,6380.38,6683.86,6305.70,4396930048,0.035760,6305.148,6236.143,6363.744667,6720.677667,6482.121824,6526.0,6614.18
2018-07-03 00:00:00+00:00,6596.66,6671.37,6447.75,4672309760,-0.012789,6430.378,6272.854,6350.062667,6680.989000,6486.868641,6560.0,6529.59
2018-07-04 00:00:00+00:00,6550.87,6771.92,6450.46,4176689920,0.010408,6506.228,6315.286,6338.570000,6650.425000,6497.936777,6615.0,6597.55
...,...,...,...,...,...,...,...,...,...,...,...,...
2023-01-23 00:00:00+00:00,22721.09,23126.49,22654.30,26518700512,0.009419,22439.162,21707.284,20561.982667,18664.842667,20387.070211,23520.0,22934.43
2023-01-24 00:00:00+00:00,22929.63,23134.01,22549.74,26405069715,-0.012992,22749.098,21873.301,20924.644000,18857.992000,20612.010190,23368.0,22636.47
2023-01-25 00:00:00+00:00,22639.27,23722.10,22406.08,30685366709,0.021266,22837.360,22097.007,21302.748667,19064.594000,20862.595171,23976.0,23117.86
2023-01-26 00:00:00+00:00,23108.96,23237.08,22911.37,26357839322,-0.003680,22888.392,22283.322,21642.607333,19275.114333,21079.613654,23672.0,23032.78


In [17]:
# Plot Test Data
df_test.reset_index(inplace=True)
fig = px.line(df_test, x='Date', y=['Actual Close' ,'Predict'], title='Close Price with Range Slider')
fig.update_layout(hovermode="x")

fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(count=2, label="2y", step="year", stepmode="backward"),
            dict(step="all")
            ])
            )
            )
fig.show()

In [18]:
# Calculate Mean Absolute Error
btc_mae = mean_absolute_error(y_train, predict)

# Calculate Mean Squared Error
btc_rmse = np.sqrt(mean_squared_error(y_train, predict))

# Calculate R2 score
btc_r_square = r2_score(y_train, predict)

#Print MAE,MSE, and R2
print(f'The Mean Absolute Error is {btc_mae:.2f}\n'
f'The Mean Squared Error is {btc_rmse:.2f}\n'
f'The R2 Score is {btc_r_square:.2f}')

The Mean Ablolute Error is 134.66
The Mean Squared Error is 187.08
The R2 Score is 0.99
