In [1]:
import pandas as p
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import numpy as n

df = p.read_csv('crypto_final.csv')
df = df.dropna()

target_cols = ['BTC-USD', 'ETH-USD', 'DOGE-USD', 'SOL-USD', 'BNB-USD']

X = df.drop(target_cols + ['Date'], axis=1) # Remove 'Date' column from features
y = df[target_cols]

# Replace infinite values with the mean of the column
X.replace([n.inf, -n.inf], n.nan, inplace=True)
X.fillna(X.mean(), inplace=True)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(),
    'Gradient Boosting': GradientBoostingRegressor(),
}

results = {}

for target_col in target_cols:
    print(f'\nTraining models for {target_col}')
    results[target_col] = {}
    for name, model in models.items():
        model.fit(X_train, y_train[target_col])
        preds = model.predict(X_test)

        rmse = n.sqrt(mean_squared_error(y_test[target_col], preds))
        mae = mean_absolute_error(y_test[target_col], preds)
        r2 = r2_score(y_test[target_col], preds)

        results[target_col][name] = {'RMSE': rmse, 'MAE': mae, 'R2': r2}

for target_col in target_cols:
    results_df = p.DataFrame(results[target_col]).T
    print(f'\nModel Comparison for {target_col}')
    print(results_df.sort_values(by='RMSE'))
    best_model_name = results_df.sort_values(by='RMSE').index[0]
    print(f'Best model for {target_col} is {best_model_name}')


Training models for BTC-USD

Training models for ETH-USD

Training models for DOGE-USD

Training models for SOL-USD

Training models for BNB-USD

Model Comparison for BTC-USD
                       RMSE       MAE        R2
Gradient Boosting  0.009999  0.006205  0.998235
Linear Regression  0.010309  0.006586  0.998124
Random Forest      0.012955  0.008446  0.997037
Best model for BTC-USD is Gradient Boosting

Model Comparison for ETH-USD
                       RMSE       MAE        R2
Gradient Boosting  0.010457  0.007339  0.998251
Linear Regression  0.015515  0.010704  0.996151
Random Forest      0.018489  0.012413  0.994533
Best model for ETH-USD is Gradient Boosting

Model Comparison for DOGE-USD
                       RMSE       MAE        R2
Linear Regression  0.013383  0.007494  0.995183
Random Forest      0.017466  0.008590  0.991796
Gradient Boosting  0.017901  0.007989  0.991382
Best model for DOGE-USD is Linear Regression

Model Comparison for SOL-USD
                       R

In [2]:
import joblib

for target_col in target_cols:
  results_df = p.DataFrame(results[target_col]).T
  best_model_name = results_df.sort_values(by='RMSE').index[0]
  print(f'Best model for {target_col}: {best_model_name}')

  best_model = models[best_model_name]
  best_model.fit(X, y[target_col])

  joblib.dump(best_model, f'{target_col}_model.pkl')
  print(f'Saved as {target_col}_model.pkl')

Best model for BTC-USD: Gradient Boosting
Saved as BTC-USD_model.pkl
Best model for ETH-USD: Gradient Boosting
Saved as ETH-USD_model.pkl
Best model for DOGE-USD: Linear Regression
Saved as DOGE-USD_model.pkl
Best model for SOL-USD: Gradient Boosting
Saved as SOL-USD_model.pkl
Best model for BNB-USD: Gradient Boosting
Saved as BNB-USD_model.pkl


In [3]:
btc_model = joblib.load('BTC-USD_model.pkl')

sample = X.tail(1)
print(sample)

pred = btc_model.predict(sample)
print(pred[0])

     BNB-USD_Prev_1  BNB-USD_Prev_2  BNB-USD_Prev_3  BNB-USD_MA_5  \
721        0.915371        0.896305        0.946218      0.911535   

     BNB-USD_MA_10  BNB-USD_Change  BNB-USD_Volatility  BTC-USD_Prev_1  \
721       0.896647       -0.006333            0.021843        0.849217   

     BTC-USD_Prev_2  BTC-USD_Prev_3  ...  ETH-USD_MA_10  ETH-USD_Change  \
721        0.859126        0.877377  ...       0.759574        -0.01107   

     ETH-USD_Volatility  SOL-USD_Prev_1  SOL-USD_Prev_2  SOL-USD_Prev_3  \
721            0.009619        0.732731        0.727535        0.748861   

     SOL-USD_MA_5  SOL-USD_MA_10  SOL-USD_Change  SOL-USD_Volatility  
721      0.727663       0.729172       -0.009741            0.016269  

[1 rows x 35 columns]
0.8572581657825954


In [4]:
from sklearn.preprocessing import MinMaxScaler

btc_scaler = MinMaxScaler()
btc_scaler.fit_transform(df[['BTC-USD']])
real_price = btc_scaler.inverse_transform(n.array(pred).reshape(-1, 1))
print(real_price[0][0])

0.8585676753776577
