# Random Forest Regressor Model

In [1]:
# Import the dependency
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Loading the dataset
crypto_df = pd.read_csv("resources/train.csv")
crypto_df.head(10)

FileNotFoundError: [Errno 2] No such file or directory: 'resources/train.csv'

In [None]:
asset_df = pd.read_csv('resources/asset_details.csv')
asset_df.sort_values(by='Asset_ID',ascending=True)

## Cleaning the data

In [None]:
# Check for missing data
crypto_df.info(show_counts =True)

In [None]:
# Check missing values in each column
crypto_df.isnull().sum()

In [None]:
# Replace NaN values with zeros in Target column
crypto_df["Target"] = crypto_df["Target"].fillna(0)
crypto_df.head(10)

In [None]:
# Show all rows with NaN values in VWAP column
crypto_df[crypto_df["VWAP"].isnull()]

In [None]:
# Fill time gaps with reindex() method
BNB = crypto_df[crypto_df["Asset_ID"]==0].set_index("timestamp")
BNB = BNB.reindex(range(BNB.index[0],BNB.index[-1]+60,60),method='pad')
(BNB.index[1:]-BNB.index[:-1]).value_counts()

In [None]:
BTC = crypto_df[crypto_df["Asset_ID"]==1].set_index("timestamp")
BTC = BTC.reindex(range(BTC.index[0],BTC.index[-1]+60,60),method='pad')
(BTC.index[1:]-BTC.index[:-1]).value_counts()

In [None]:
EOS = crypto_df[crypto_df["Asset_ID"]==5].set_index("timestamp")
EOS = EOS.reindex(range(EOS.index[0],EOS.index[-1]+60,60),method='pad')
(EOS.index[1:]-EOS.index[:-1]).value_counts()

In [None]:
ETH = crypto_df[crypto_df["Asset_ID"]==6].set_index("timestamp")
ETH = ETH.reindex(range(ETH.index[0],ETH.index[-1]+60,60),method='pad')
(ETH.index[1:]-ETH.index[:-1]).value_counts()

In [None]:
LTC = crypto_df[crypto_df["Asset_ID"]==9].set_index("timestamp")
LTC = LTC.reindex(range(LTC.index[0],LTC.index[-1]+60,60),method='pad')
(LTC.index[1:]-LTC.index[:-1]).value_counts()

In [None]:
XMR = crypto_df[crypto_df["Asset_ID"]==11].set_index("timestamp")
XMR = XMR.reindex(range(XMR.index[0],XMR.index[-1]+60,60),method='pad')
(XMR.index[1:]-XMR.index[:-1]).value_counts()

In [None]:
# Check if there still any missing data from chosen assets
BNB.isnull().sum()

In [None]:
BTC.isnull().sum()

In [None]:
EOS.isnull().sum()

In [None]:
ETH.isnull().sum()

In [None]:
LTC.isnull().sum()

In [None]:
XMR.isnull().sum()

In [None]:
# Merge dataset for chosen assets
frames = [BNB, BTC, EOS, ETH, LTC, XMR]
crypto_con_df = pd.concat(frames)
crypto_con_df.head(10)

In [None]:
# Use timestamp to sort the values
crypto_con_df = crypto_con_df.sort_index()
crypto_con_df.head(10)

In [None]:
crypto_con_df.index[7793280].astype('datetime64[s]')

In [None]:
crypto_con_df = crypto_con_df.iloc[7793280:]
crypto_con_df.head(10)

In [None]:
print(datetime.utcfromtimestamp(1592697660))

In [None]:
# Reset index
crypto_con_df.reset_index(inplace=True)
crypto_con_df.head(10)

In [None]:
# Convert the timestamp
crypto_con_df['date'] = pd.to_datetime(crypto_con_df['timestamp'], unit='s')
crypto_con_df.head(10)

In [None]:
# Check datatypes of new dataframe
crypto_con_df.info(show_counts=True)

In [None]:
# Merge dataframe with asset details dataframe
crypto_details_df = crypto_con_df.merge(asset_df, how='left', on="Asset_ID")
crypto_details_df.head(10)

In [None]:
# Reorder column
crypto_details_df = crypto_details_df.reindex(columns=['timestamp','date','Asset_ID','Asset_Name','Weight','Count','Open','High','Low','Close','Volume','VWAP','Target'])
crypto_details_df.head(10)

In [None]:
# Export data into a CSV
output_data_file = "resources/crypto_details_close.csv"
crypto_details_df.to_csv(output_data_file, index=False)

## Overview of the Data

In [None]:
# Check the shape
print(f'Shape of Dataframe {crypto_details_df.shape}')

In [None]:
# visualize the close price of each cryptocurrencies
plt.figure(figsize=(15,6))

plt.plot(BNB['Close'], label='BNB', color="red")
plt.plot(BTC['Close'], label='BTC', color="blue")
plt.plot(EOS['Close'], label='EOS', color="green")
plt.plot(ETH['Close'], label='ETH', color="yellow")
plt.plot(LTC['Close'], label='LTC', color="black")
plt.plot(XMR['Close'], label='XMR', color="purple")

plt.legend()
plt.xlabel('Time')
plt.ylabel('Close Price')

plt.tight_layout()
plt.show()

In [None]:
# visualize the target of each cryptocurrencies
plt.figure(figsize=(15,6))

plt.plot(BNB['Target'], label='BNB', color="red")
plt.plot(BTC['Target'], label='BTC', color="blue")
plt.plot(EOS['Target'], label='EOS', color="green")
plt.plot(ETH['Target'], label='ETH', color="yellow")
plt.plot(LTC['Target'], label='LTC', color="black")
plt.plot(XMR['Target'], label='XMR', color="purple")

plt.legend()
plt.xlabel('Time')
plt.ylabel('Target - 15 minute residual return')

plt.tight_layout()
plt.show()

In [None]:
# Candlestick chart
import plotly.graph_objects as go
BNB_mini = BNB.iloc[-200:] # Select recent data rows
fig = go.Figure(data=[go.Candlestick(x=BNB_mini.index, open=BNB_mini['Open'], high=BNB_mini['High'], low=BNB_mini['Low'], close=BNB_mini['Close'])])
fig.show()

In [None]:
BTC_mini = BTC.iloc[-200:] # Select recent data rows
fig = go.Figure(data=[go.Candlestick(x=BTC_mini.index, open=BTC_mini['Open'], high=BTC_mini['High'], low=BTC_mini['Low'], close=BTC_mini['Close'])])
fig.show()

In [None]:
EOS_mini = EOS.iloc[-200:] # Select recent data rows
fig = go.Figure(data=[go.Candlestick(x=EOS_mini.index, open=EOS_mini['Open'], high=EOS_mini['High'], low=EOS_mini['Low'], close=EOS_mini['Close'])])
fig.show()

In [None]:
ETH_mini = ETH.iloc[-200:] # Select recent data rows
fig = go.Figure(data=[go.Candlestick(x=ETH_mini.index, open=ETH_mini['Open'], high=ETH_mini['High'], low=ETH_mini['Low'], close=ETH_mini['Close'])])
fig.show()

In [None]:
LTC_mini = LTC.iloc[-200:] # Select recent data rows
fig = go.Figure(data=[go.Candlestick(x=LTC_mini.index, open=LTC_mini['Open'], high=LTC_mini['High'], low=LTC_mini['Low'], close=LTC_mini['Close'])])
fig.show()

In [None]:
XMR_mini = XMR.iloc[-200:] # Select recent data rows
fig = go.Figure(data=[go.Candlestick(x=XMR_mini.index, open=XMR_mini['Open'], high=XMR_mini['High'], low=XMR_mini['Low'], close=XMR_mini['Close'])])
fig.show()

## Preprocessing the Data

In [None]:
# Split training and testing data for Binance Coin
BNB_train = BNB[1298880:1824480]# Start from 2020-06-21 00:01:00 to 2021-06-21 00:00:00
BNB_test = BNB[1824480:] # Start from 2021-06-21 00:01:00 to 2021-09-21 00:00:00
X_BNB_train = BNB_train[["Volume", "VWAP"]]
X_BNB_test = BNB_test[["Volume", "VWAP"]]
y_BNB_train = BNB_train.Close
y_BNB_test = BNB_test.Close

In [None]:
# Split training and testing data for Bitcoin
BTC_train = BTC[1298880:1824480]# Start from 2020-06-21 00:01:00 to 2021-06-21 00:00:00
BTC_test = BTC[1824480:] # Start from 2021-06-21 00:01:00 to 2021-09-21 00:00:00
X_BTC_train = BTC_train[["Volume", "VWAP"]]
X_BTC_test = BTC_test[["Volume", "VWAP"]]
y_BTC_train = BTC_train.Close
y_BTC_test = BTC_test.Close

In [None]:
# Split training and testing data for EOS.io
EOS_train = EOS[1298880:1824480]# Start from 2020-06-21 00:01:00 to 2021-06-21 00:00:00
EOS_test = EOS[1824480:] # Start from 2021-06-21 00:01:00 to 2021-09-21 00:00:00
X_EOS_train = EOS_train[["Volume", "VWAP"]]
X_EOS_test = EOS_test[["Volume", "VWAP"]]
y_EOS_train = EOS_train.Close
y_EOS_test = EOS_test.Close

In [None]:
# Split training and testing data for Ethereum
ETH_train = ETH[1298880:1824480]# Start from 2020-06-21 00:01:00 to 2021-06-21 00:00:00
ETH_test = ETH[1824480:] # Start from 2021-06-21 00:01:00 to 2021-09-21 00:00:00
X_ETH_train = ETH_train[["Volume", "VWAP"]]
X_ETH_test = ETH_test[["Volume", "VWAP"]]
y_ETH_train = ETH_train.Close
y_ETH_test = ETH_test.Close

In [None]:
# Split trainng and testing data for Litecoin
LTC_train = LTC[1298880:1824480]# Start from 2020-06-21 00:01:00 to 2021-06-21 00:00:00
LTC_test = LTC[1824480:] # Start from 2021-06-21 00:01:00 to 2021-09-21 00:00:00
X_LTC_train = LTC_train[["Volume", "VWAP"]]
X_LTC_test = LTC_test[["Volume", "VWAP"]]
y_LTC_train = LTC_train.Close
y_LTC_test = LTC_test.Close

In [None]:
# Split training and testing data for Monero
XMR_train = XMR[1298880:1824480]# Start from 2020-06-21 00:01:00 to 2021-06-21 00:00:00
XMR_test = XMR[1824480:] # Start from 2021-06-21 00:01:00 to 2021-09-21 00:00:00
X_XMR_train = XMR_train[["Volume", "VWAP"]]
X_XMR_test = XMR_test[["Volume", "VWAP"]]
y_XMR_train = XMR_train.Close
y_XMR_test = XMR_test.Close

## Check shape of data

In [None]:
print(f'shape of X_XMR_train : {X_XMR_train.shape}\nshape of X_XMR_test : {X_XMR_test.shape}\nshape of y_XMR_train : {y_XMR_train.shape}\nshape of y_LTC_test : {y_XMR_test.shape}')

In [None]:
# Check timepoint
print(datetime.utcfromtimestamp(XMR.index[1824480]))

## Scale the training and testing data

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Scale the data for those eight cryptocurrencies
X_BNB_train_scaled = scaler.fit_transform(X_BNB_train)
X_BNB_test_scaled = scaler.fit_transform(X_BNB_test)

X_BTC_train_scaled = scaler.fit_transform(X_BTC_train)
X_BTC_test_scaled = scaler.fit_transform(X_BTC_test)

X_EOS_train_scaled = scaler.fit_transform(X_EOS_train)
X_EOS_test_scaled = scaler.fit_transform(X_EOS_test)

X_ETH_train_scaled = scaler.fit_transform(X_ETH_train)
X_ETH_test_scaled = scaler.fit_transform(X_ETH_test)

X_LTC_train_scaled = scaler.fit_transform(X_LTC_train)
X_LTC_test_scaled = scaler.fit_transform(X_LTC_test)

#X_XMR_train_scaled = scaler.fit_transform(X_XMR_train)
#X_XMR_test_scaled = scaler.fit_transform(X_XMR_test)

## Random Forest Regression

In [None]:
rf_model = RandomForestRegressor()

rf_model.fit(X_BNB_train_scaled,y_BNB_train)
y_pred_BNB = rf_model.predict(X_BNB_test_scaled)

rf_model.fit(X_BTC_train_scaled,y_BTC_train)
y_pred_BTC = rf_model.predict(X_BTC_test_scaled)

rf_model.fit(X_EOS_train_scaled,y_EOS_train)
y_pred_EOS = rf_model.predict(X_EOS_test_scaled)

rf_model.fit(X_ETH_train_scaled,y_ETH_train)
y_pred_ETH = rf_model.predict(X_ETH_test_scaled)

rf_model.fit(X_LTC_train_scaled,y_LTC_train)
y_pred_LTC = rf_model.predict(X_LTC_test_scaled)

rf_model.fit(X_XMR_train_scaled,y_XMR_train)
y_pred_XMR = rf_model.predict(X_XMR_test_scaled)

In [None]:
print("Binance Coin Prediction")
print("MSE : %.4f",mean_absolute_error(y_BNB_test, y_pred_BNB))
print("MAE : %.4f",mean_squared_error(y_BNB_test,y_pred_BNB))
print("Test Score: %.4f",r2_score(y_BNB_test,y_pred_BNB))

In [None]:
print("Bitcoin Prediction")
print("MSE : %.4f",mean_absolute_error(y_BTC_test, y_pred_BTC))
print("MAE : %.4f",mean_squared_error(y_BTC_test,y_pred_BTC))
print("Test Score: %.4f",r2_score(y_BTC_test,y_pred_BTC))

In [None]:
print("EOS.io Prediction")
print("MSE : %.4f",mean_absolute_error(y_EOS_test, y_pred_EOS))
print("MAE : %.4f",mean_squared_error(y_EOS_test,y_pred_EOS))
print("Test Score: %.4f",r2_score(y_EOS_test,y_pred_EOS))

In [None]:
print("Ethereum Prediction")
print("MSE : %.4f",mean_absolute_error(y_ETH_test, y_pred_ETH))
print("MAE : %.4f",mean_squared_error(y_ETH_test,y_pred_ETH))
print("Test Score: %.4f",r2_score(y_ETH_test,y_pred_ETH))

In [None]:
print("Litecoin Prediction")
print("MSE : %.4f",mean_absolute_error(y_LTC_test, y_pred_LTC))
print("MAE : %.4f",mean_squared_error(y_LTC_test,y_pred_LTC))
print("Test Score: %.4f",r2_score(y_LTC_test,y_pred_LTC))

In [None]:
print("Monero Prediction")
print("MSE : %.4f",mean_absolute_error(y_XMR_test, y_pred_XMR))
print("MAE : %.4f",mean_squared_error(y_XMR_test,y_pred_XMR))
print("Test Score: %.4f",r2_score(y_XMR_test,y_pred_XMR))

In [None]:
pred_BNB = pd.DataFrame({"Real (BNB) Prediction":y_BNB_test,"Model Prediction":y_pred_BNB})
pred_BNB.plot(grid=True, figsize=(12,8))

In [None]:
pred_BTC = pd.DataFrame({"Real (BTC) Prediction":y_BTC_test,"Model Prediction":y_pred_BTC})
pred_BTC.plot(grid=True, figsize=(12,8))

In [None]:
pred_EOS = pd.DataFrame({"Real (EOS) Prediction":y_EOS_test,"Model Prediction":y_pred_EOS})
pred_EOS.plot(grid=True, figsize=(12,8))

In [None]:
pred_ETH = pd.DataFrame({"Real (ETH) Prediction":y_ETH_test,"Model Prediction":y_pred_ETH})
pred_ETH.plot(grid=True, figsize=(12,8))

In [None]:
pred_LTC = pd.DataFrame({"Real (LTC) Prediction":y_LTC_test,"Model Prediction":y_pred_LTC})
pred_LTC.plot(grid=True, figsize=(12,8))

In [None]:
#pred_XMR = pd.DataFrame({"Real (XMR) Prediction":y_XMR_test,"Model Prediction":y_pred_XMR})
#pred_XMR.plot(grid=True, figsize=(12,8))

## Connect to SQL Database