In [7]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [8]:
e_files = ["/content/Ethereum Historical Data.csv", "/content/Ethereum Historical Data (1).csv", "/content/Ethereum Historical Data (2).csv"]
b_files = ["/content/Bitcoin Historical Data.csv", "/content/Bitcoin Historical Data (1).csv", "/content/Bitcoin Historical Data (2).csv"]
t_files = ["/content/TRON Inc Stock Price History.csv", "/content/TRON Inc Stock Price History (1).csv", "/content/TRON Inc Stock Price History (2).csv"]

In [9]:
def load_concat(files):
    dfs = [pd.read_csv(f.replace('﻿', '')) for f in files]
    df = pd.concat(dfs, axis=0, ignore_index=True)
    for col in ['Price', 'Open', 'High', 'Low', 'Vol.']:
        df[col] = df[col].astype(str).str.replace(',', '').str.replace('K','e3').str.replace('M','e6').str.strip()
        df[col] = df[col].replace({'e3':'*1e3','e6':'*1e6'}, regex=True)
        df[col] = df[col].map(lambda x: eval(str(x)) if 'e' in str(x) else x)
        df[col] = pd.to_numeric(df[col], errors='coerce')
    df['Change %'] = df['Change %'].astype(str).str.replace('%','').str.replace(',','').str.strip()
    df['Change %'] = pd.to_numeric(df['Change %'].replace('', 0))
    df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y', errors='coerce')
    df = df.sort_values('Date')
    return df.reset_index(drop=True)

In [10]:
df_eth = load_concat(e_files)
df_btc = load_concat(b_files)
df_tron = load_concat(t_files)

In [11]:
features = ['Open', 'High', 'Low', 'Vol.', 'Change %']
scaler_eth = MinMaxScaler()
scaler_btc = MinMaxScaler()
scaler_tron = MinMaxScaler()
df_eth[features] = scaler_eth.fit_transform(df_eth[features])
df_btc[features] = scaler_btc.fit_transform(df_btc[features])
df_tron[features] = scaler_tron.fit_transform(df_tron[features])

In [12]:
# Model training - Random Forest
def train_rf(df, features, target='Price'):
    X = df[features]
    y = df[target]
    split = int(len(df)*0.8)
    X_train, X_test = X[:split], X[split:]
    y_train, y_test = y[:split], y[split:]
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    return mae, rmse

In [13]:
eth_rf_mae, eth_rf_rmse = train_rf(df_eth, features)
btc_rf_mae, btc_rf_rmse = train_rf(df_btc, features)
tron_rf_mae, tron_rf_rmse = train_rf(df_tron, features)

results = pd.DataFrame({
    'Asset': ['Ethereum', 'Bitcoin', 'TRON'],
    'RF MAE': [eth_rf_mae, btc_rf_mae, tron_rf_mae],
    'RF RMSE': [eth_rf_rmse, btc_rf_rmse, tron_rf_rmse]
})
results.to_csv('blockchain_latency_prediction_rf_results.csv', index=False)
print(results)


      Asset      RF MAE     RF RMSE
0  Ethereum  126.908700  156.875963
1   Bitcoin  829.473091  994.466779
2      TRON    0.748435    0.979000
