In [16]:
import numpy as np
import pandas as pd
import hvplot.pandas
from pathlib import Path
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import yfinance as yf

In [27]:
# Pull in data using Yahoo finance API
data = yf.download(tickers=['BTC-USD','ETH-USD'], period = '90d', interval = '1d')
data['Close']

[*********************100%***********************]  2 of 2 completed


Unnamed: 0_level_0,BTC-USD,ETH-USD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-13,44963.074219,3285.511719
2021-09-14,47092.492188,3429.169678
2021-09-15,48176.347656,3615.282715
2021-09-16,47783.359375,3571.294922
2021-09-17,47267.519531,3398.538818
...,...,...
2021-12-07,50700.085938,4315.061523
2021-12-08,50504.796875,4439.357910
2021-12-09,47672.121094,4119.815918
2021-12-10,47243.304688,3908.496094


In [33]:
# Create df with yahoo finance close prices
crypto_price_df = data['Close'].pct_change()
crypto_price_df.dropna()

Unnamed: 0_level_0,BTC-USD,ETH-USD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-14,0.047359,0.043725
2021-09-15,0.023015,0.054273
2021-09-16,-0.008157,-0.012167
2021-09-17,-0.010795,-0.048374
2021-09-18,0.021386,0.009851
...,...,...
2021-12-07,0.002322,-0.010020
2021-12-08,-0.003852,0.028805
2021-12-09,-0.056087,-0.071979
2021-12-10,-0.008995,-0.051294


In [34]:
# Graph Correlation between BTC & ETH
crypto_corr = crypto_price_df.corr()
crypto_corr_plot = crypto_corr.hvplot.heatmap()
crypto_corr_plot

In [25]:
# Read in .csv file using bloomberg static data
df = pd.read_csv('data.csv', index_col = 'Date')

#DROP S&P500 AND TSY COLUMNS FOR CUPCAKE. FEED DATA IN WHEN WE GET
df.drop(columns = ['ETH','SPY','TSY'],inplace=True)

#DROP NULLS
df.dropna()

Unnamed: 0_level_0,BTC,GOLD,OIL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12/9/2021,49234.88,1775.76,71.61
12/8/2021,50634.60,1782.99,72.36
12/7/2021,50527.71,1784.13,72.05
12/6/2021,50119.66,1778.67,69.49
12/3/2021,53655.26,1783.29,66.26
...,...,...,...
2/14/2018,9287.96,1350.73,50.56
2/13/2018,8557.91,1329.55,50.55
2/12/2018,8831.25,1322.70,49.90
2/9/2018,8552.65,1316.65,49.73


In [3]:
df.head()

Unnamed: 0_level_0,BTC,GOLD,OIL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12/9/2021,49234.88,1775.76,71.61
12/8/2021,50634.6,1782.99,72.36
12/7/2021,50527.71,1784.13,72.05
12/6/2021,50119.66,1778.67,69.49
12/3/2021,53655.26,1783.29,66.26


In [26]:
# Convert df to % change for analysis
pct_chg_df = df.pct_change()
pct_chg_df = pct_chg_df.dropna()

In [5]:
# Define features set
X = pct_chg_df.copy()
X.drop("BTC", axis=1, inplace=True)
X.head()

Unnamed: 0_level_0,GOLD,OIL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
12/8/2021,0.004071,0.010473
12/7/2021,0.000639,-0.004284
12/6/2021,-0.00306,-0.035531
12/3/2021,0.002597,-0.046482
12/2/2021,-0.008159,0.003622


In [6]:
# Define target vector
y = pct_chg_df["BTC"].values.reshape(-1, 1)
y[:5]

array([[ 0.02842944],
       [-0.00211101],
       [-0.00807577],
       [ 0.07054318],
       [ 0.06053796]])

In [7]:
# Splitting into Train and Test sets

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)


In [8]:
rf_model = RandomForestRegressor(n_estimators=500, random_state=42)

In [9]:
# Fit the model
rf_model = rf_model.fit(X_train, y_train)

  


In [10]:
# Making predictions using the testing data
predictions = rf_model.predict(X_test)

In [11]:
# #Compare predictions vs actual
# y_test_df = pd.DataFrame(y_test)
# y_test_df['predict']=predictions
# y_test_df.hvplot()

In [12]:
# Get the feature importance array
importances = rf_model.feature_importances_

In [13]:
# List the top 10 most important features
importances_sorted = sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)
importances_sorted[:10]

[(0.5299608400786076, 'GOLD'), (0.47003915992139256, 'OIL')]

In [14]:
rf_model.score(X_test, y_test)

-0.11087545425032386