In [1]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Downloading historical data from Yahoo Finance
btc_data = yf.download('BTC-USD', start='2010-03-31', end='2025-05-15')

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


In [3]:
btc_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3893 entries, 2014-09-17 to 2025-05-14
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   (Close, BTC-USD)   3893 non-null   float64
 1   (High, BTC-USD)    3893 non-null   float64
 2   (Low, BTC-USD)     3893 non-null   float64
 3   (Open, BTC-USD)    3893 non-null   float64
 4   (Volume, BTC-USD)  3893 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 182.5 KB


In [4]:
# Save the data to a CSV file
btc_data.to_csv('bitcoin_data.csv')

In [10]:
# Load the data
data = pd.read_csv('bitcoin_data.csv',header=[0,1,2],index_col=0)

In [13]:
# Flatten multi-index columns by joining them with '_'
data.columns = ['_'.join(col).strip() for col in data.columns.values]

In [14]:
data.isnull().sum()

Unnamed: 0,0
Close_BTC-USD_Unnamed: 1_level_2,0
High_BTC-USD_Unnamed: 2_level_2,0
Low_BTC-USD_Unnamed: 3_level_2,0
Open_BTC-USD_Unnamed: 4_level_2,0
Volume_BTC-USD_Unnamed: 5_level_2,0


In [15]:
data.head()

Unnamed: 0,Close_BTC-USD_Unnamed: 1_level_2,High_BTC-USD_Unnamed: 2_level_2,Low_BTC-USD_Unnamed: 3_level_2,Open_BTC-USD_Unnamed: 4_level_2,Volume_BTC-USD_Unnamed: 5_level_2
2014-09-17,457.334015,468.174011,452.421997,465.864014,21056800
2014-09-18,424.440002,456.859985,413.104004,456.859985,34483200
2014-09-19,394.79599,427.834991,384.532013,424.102997,37919700
2014-09-20,408.903992,423.29599,389.882996,394.673004,36863600
2014-09-21,398.821014,412.425995,393.181,408.084991,26580100


In [16]:
data.rename(columns={'Close_BTC-USD_Unnamed: 1_level_2':'Close','High_BTC-USD_Unnamed: 2_level_2':'High','Low_BTC-USD_Unnamed: 3_level_2':'Low','Open_BTC-USD_Unnamed: 4_level_2':'Open','Volume_BTC-USD_Unnamed: 5_level_2':'Volume'},inplace=True)

In [17]:
data.head()

Unnamed: 0,Close,High,Low,Open,Volume
2014-09-17,457.334015,468.174011,452.421997,465.864014,21056800
2014-09-18,424.440002,456.859985,413.104004,456.859985,34483200
2014-09-19,394.79599,427.834991,384.532013,424.102997,37919700
2014-09-20,408.903992,423.29599,389.882996,394.673004,36863600
2014-09-21,398.821014,412.425995,393.181,408.084991,26580100


In [18]:
data.isnull().sum()

Unnamed: 0,0
Close,0
High,0
Low,0
Open,0
Volume,0


In [19]:
# Data preprocessing
features = data[['Open', 'High', 'Low', 'Close', 'Volume']]
target = data['Close']

In [20]:
target.head()

Unnamed: 0,Close
2014-09-17,457.334015
2014-09-18,424.440002
2014-09-19,394.79599
2014-09-20,408.903992
2014-09-21,398.821014


In [21]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [22]:
# Random Forest Regression
rf_model = RandomForestRegressor(n_estimators=500, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)

In [24]:
# Evaluate model
mape_rf = mean_absolute_percentage_error(y_test, rf_predictions)
# Calculate Mean Squared Error (MSE)
mse_rf = mean_squared_error(y_test, rf_predictions)
# Calculate Root Mean Squared Error (RMSE) by taking the square root of MSE
rmse_rf = np.sqrt(mse_rf)

print(f"Random Forest - MAPE: {mape_rf}, RMSE: {rmse_rf}")

Random Forest - MAPE: 0.002460418039211802, RMSE: 65.59271531129232


| Metric | Meaning                          | Your Value       | Interpretation                         |
| ------ | -------------------------------- | ---------------- | ---------------------------------------- |
| MAPE   | Average percentage error         | 0.00246 (0.246%) | Very accurate predictions                |
| RMSE   | Average prediction error (in \$) | 65.59            | Depends on price scale, likely low error |
