In [1]:
# Step 1: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

In [2]:
# Step 2: Load the dataset
df = pd.read_csv("crypto_data_with_features.csv")

In [13]:
# New temporal/technical features
df['price_to_volume'] = df['price'] / df['24h_volume']
df['mcap_to_volume'] = df['mkt_cap'] / df['24h_volume']
df['vol_change'] = (df['7d'] - df['24h']) + df['1h']  # total swing
df['volatility_squared'] = df['volatility_score'] ** 2

In [14]:
from scipy import stats
df = df[(np.abs(stats.zscore(df['log_liquidity'])) < 3)]

In [15]:
# Step 3: Define features and target
features = ['price', '1h', '24h', '7d', '24h_volume', 'mkt_cap', 'volatility_score']
X = df[features]

# Apply log transform to reduce skew
df['log_liquidity'] = np.log1p(df['liquidity_ratio'])

# Redefine target
y = df['log_liquidity']


In [16]:
# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
# Step 5: Initialize and train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [18]:
# Step 6: Make predictions
y_pred = model.predict(X_test)

In [19]:
# Step 7: Evaluate model
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.5f}")
print(f"RMSE: {rmse:.5f}")
print(f"R² Score: {r2:.5f}")

MAE: 0.01232
RMSE: 0.03152
R² Score: 0.87276


In [20]:
from xgboost import XGBRegressor

model = XGBRegressor(n_estimators=300, max_depth=6, learning_rate=0.05)
model.fit(X_train, y_train)


In [21]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [4, 6, 8],
    'min_samples_split': [2, 5],
}

grid = GridSearchCV(RandomForestRegressor(), param_grid, cv=3, scoring='r2')
grid.fit(X_train, y_train)
print(grid.best_params_)


{'max_depth': 8, 'min_samples_split': 5, 'n_estimators': 200}


In [22]:
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
X_poly = poly.fit_transform(X)

In [23]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [25]:
from sklearn.svm import SVR
# Base models
base_models = [
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
    ('xgb', XGBRegressor(n_estimators=200, max_depth=4, learning_rate=0.05, random_state=42)),
    ('svr', SVR(kernel='rbf', C=1.0, epsilon=0.1))
]

In [26]:

# Meta model
meta_model = LinearRegression()


<IPython.core.display.Javascript object>

In [28]:
from sklearn.ensemble import StackingRegressor
# Stacking Regressor
stack_model = StackingRegressor(estimators=base_models, final_estimator=meta_model, passthrough=True)
stack_model.fit(X_train, y_train)


In [29]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# Predict & Evaluate
y_pred = stack_model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"Stacking MAE: {mae:.5f}")
print(f"Stacking RMSE: {rmse:.5f}")
print(f"Stacking R² Score: {r2:.5f}")

Stacking MAE: 0.00836
Stacking RMSE: 0.01932
Stacking R² Score: 0.95221


In [30]:
import joblib
joblib.dump(stack_model, 'final_liquidity_stack_model.pkl')

['final_liquidity_stack_model.pkl']