In [1]:
import yfinance as yf
from datetime import datetime, timedelta
from scipy import stats
import numpy as np
import pandas as pd
import statsmodels.api as sm

import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [2]:
# Bitcoin Data
btc_ticker = "BTC-USD"
eth_ticker = "ETH-USD"
start_date = "2024-01-01"
end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")

In [3]:
btc_df = yf.download(btc_ticker, start=start_date, end=end_date) 
eth_df = yf.download(eth_ticker, start=start_date, end=end_date)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


ATR measures market volatility and is calculated based on the difference between high and low prices and the previous close

In [4]:
def calculate_atr(df):
    df['High_Low'] = df['High'] - df['Low']
    df['High_Closing'] = np.abs(df['High'] - df['Close'].shift(1))
    df['Low_Closing'] = np.abs(df['Low'] - df['Close'].shift(1))
    df['ATR'] = df[['High_Low', 'High_Closing', 'Low_Closing']].max(axis=1).rolling(window=20).mean() 
    df['Upper_ATR'] = df['Close'] + (2 * df['ATR'])
    df['Lower_ATR'] = df['Close'] - (2 * df['ATR'])
    return df.dropna()


In [5]:
btc_df = calculate_atr(btc_df) 
eth_df = calculate_atr(eth_df)

Kernel Regression is a non-parametric method used to predict future values based on historical data

In [6]:
def kernel_regression(df, bandwidth):
    X_kr = np.arange(len(df)).reshape(-1, 1) 
    y_kr = df['Close'].values
    
    kernel_model = sm.nonparametric.KernelReg(endog=y_kr, exog=X_kr, var_type='c', bw=[bandwidth]) 
    y_hat, _ = kernel_model.fit(X_kr)
    
    future_days = np.arange(len(df), len(df) + 30).reshape(-1, 1) 
    future_predictions, _ = kernel_model.fit(future_days)
    
    future_dates = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=30) 
    future_dates = [date for date in future_dates if date.weekday() < 5][:30]
    
    return y_hat, future_dates, future_predictions


In [7]:
btc_y_hat, btc_future_dates, btc_future_predictions = kernel_regression(btc_df, bandwidth=5) 
eth_y_hat, eth_future_dates, eth_future_predictions = kernel_regression(eth_df, bandwidth=5)

Display actual prices, ATR bands, and Kernel Regression forecasts along with future predictions.

In [8]:
def plot_results(df, y_hat, future_dates, future_predictions, title):
    fig = make_subplots(rows=1, cols=1)
    fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='markers', name='Actual Prices'))
    fig.add_trace(go.Scatter(x=df.index, y=df['Upper_ATR'], mode='lines', name='Upper ATR Range'))
    fig.add_trace(go.Scatter(x=df.index, y=df['Lower_ATR'], mode='lines', name='Lower ATR Range') )
    fig.add_trace(go.Scatter(x=df.index, y=y_hat, mode='lines', name='Kernel Regression', line=dict(color='orange')))
    fig.add_trace(go.Scatter(x=future_dates, y=future_predictions, mode='markers', name='Future Predictions'))
                  
    fig.update_layout(title=title, xaxis_title='Date', yaxis_title='Price', legend_title='Legend', hovermode="closest")
                      
    y_min = min(df['Close'].min(), df['Lower_ATR'].min(), future_predictions.min()) 
    y_max = max(df['Close'].max(), df['Upper_ATR'].max(), future_predictions.max()) 
    fig.update_yaxes(range=[y_min * 0.9, y_max * 1.1])
    fig.show()


In [9]:
plot_results(btc_df, btc_y_hat, btc_future_dates, btc_future_predictions, 'Bitcoin Price Prediction using ATR and Kernel Regression')
plot_results(eth_df, eth_y_hat, eth_future_dates, eth_future_predictions, 'Ethereum Price Prediction using ATR and Kernel Regresion')

### Evaluate Model Performance

In [10]:
def evaluate_model(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100 
    return r2, mae, rmse, mape

In [11]:
btc_r2, btc_mae, btc_rmse, btc_mape = evaluate_model(btc_df['Close'], btc_y_hat) 
eth_r2, eth_mae, eth_rmse, eth_mape = evaluate_model(eth_df['Close'], eth_y_hat)

In [12]:
print("\nBitcoin Model Performance Metrics:")
print(f"R-squared Score: {btc_r2:.4f}")
print(f"Mean Absolute Error: ${btc_mae:.2f}")
print(f"Root Mean Squared Error: ${btc_rmse:.2f}")
print(f"Mean Absolute Percentage Error: {btc_mape:.2f}%")


Bitcoin Model Performance Metrics:
R-squared Score: 0.9496
Mean Absolute Error: $1427.32
Root Mean Squared Error: $1854.39
Mean Absolute Percentage Error: 2.34%


In [13]:
print("\nEthereum Model Performance Metrics:")
print(f"R-squared Score: {eth_r2:.4f}")
print(f"Mean Absolute Error: ${eth_mae:.2f}")
print(f"Root Mean Squared Error: ${eth_rmse:.2f}")
print(f"Mean Absolute Percentage Error: {eth_mape:.2f}%")


Ethereum Model Performance Metrics:
R-squared Score: 0.9322
Mean Absolute Error: $90.42
Root Mean Squared Error: $117.52
Mean Absolute Percentage Error: 2.85%


Calculate the confidence interval for our predictions to understand the range within which the actual values are likely to fall.

In [14]:
confidence_level = 0.95

In [15]:
def calculate_confidence_interval(df, y_hat):
    degrees_of_freedom = len(df) - 2
    t_value = stats.t.ppf((1 + confidence_level) / 2, degrees_of_freedom) 
    residuals = df['Close'] - y_hat
    std_error = np.sqrt(np.sum(residuals**2) / degrees_of_freedom) 
    margin_of_error = t_value * std_error
    return margin_of_error

In [16]:
btc_margin_of_error = calculate_confidence_interval(btc_df, btc_y_hat) 
eth_margin_of_error = calculate_confidence_interval(eth_df, eth_y_hat)

In [17]:
print(f"\nBitcoin {confidence_level*100}% Confidence Interval: +/- ${btc_margin_of_error:.2f}") 
print(f"Ethereum {confidence_level*100}% Confidence Interval: +/- ${eth_margin_of_error:.2f}")


Bitcoin 95.0% Confidence Interval: +/- $3673.34
Ethereum 95.0% Confidence Interval: +/- $232.80
