In [1]:
# 📦 Import Libraries
import pandas as pd
import numpy as np
import os
import joblib

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# 📂 Load Engineered Data
data_path = os.path.join("..", "data", "cleaned", "feature_engineered_data.csv")
df = pd.read_csv(data_path)

# 🎯 Target and Features
target_col = 'liquidity_ratio_7d'
features = df.drop(columns=['date', 'coin', target_col], errors='ignore').select_dtypes(include=[np.number])
X = features
y = df[target_col]

# 🔍 Check for Inf and NaN
X.replace([np.inf, -np.inf], np.nan, inplace=True)
X.dropna(inplace=True)
y = y[X.index]  # Align target with X

# 🧼 Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 🧪 Split Data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 🔧 Define Model & Grid
rf = RandomForestRegressor(random_state=42)
param_grid = {
    'n_estimators': [100, 150, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'max_features': [None, 'sqrt']
}

# 🧵 Grid Search
grid = GridSearchCV(rf, param_grid, cv=5, scoring='r2', n_jobs=-1)
grid.fit(X_train, y_train)

# ✅ Best Model
best_model = grid.best_estimator_
print("Best hyperparameters:", grid.best_params_)

# 📊 Evaluate
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Test MSE: {mse:.4f}")
print(f"Test R²: {r2:.4f}")

# 💾 Save Model
model_output_path = os.path.join("..", "model", "best_random_forest_model.pkl")
os.makedirs(os.path.dirname(model_output_path), exist_ok=True)
joblib.dump(best_model, model_output_path)
print(f"Model saved at: {model_output_path}")


Best hyperparameters: {'max_depth': 10, 'max_features': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 150}
Test MSE: 4.7741
Test R²: 0.7621
Model saved at: ..\model\best_random_forest_model.pkl
