# Regression

In [56]:
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from xgboost import XGBRegressor
import joblib

In [57]:
# --- Safe path for GitHub or Streamlit deployment ---
BASE_DIR = Path().resolve().parent  # adjust `.parent` depending on notebook location
DATA_PATH = BASE_DIR / "data"
MODEL_PATH = BASE_DIR / "models"
DATA_PATH.mkdir(exist_ok=True)
MODEL_PATH.mkdir(exist_ok=True)

In [58]:
# Step 1: Load Features

df = pd.read_csv(DATA_PATH/"btc_feature.csv", parse_dates=['timestamp'])

# Define target: Future Price
df["target"] = df["future_price"]

df.tail()

Unnamed: 0,timestamp,price,return_1h,rolling_mean_3h,rolling_mean_6h,rolling_std_3h,future_price,target
156,2025-06-14 10:00:00,104657.304688,0.001559,104672.854167,104853.514323,186.701119,104915.703125,104915.703125
157,2025-06-14 11:00:00,104915.703125,0.002469,104689.140625,104828.365885,212.441201,104736.453125,104736.453125
158,2025-06-14 12:00:00,104736.453125,-0.001709,104769.820312,104772.953125,132.391333,104908.320312,104908.320312
159,2025-06-14 13:00:00,104908.320312,0.001641,104853.492188,104763.173177,101.425998,105445.15625,105445.15625
160,2025-06-14 14:00:00,105445.15625,0.005117,105029.976562,104859.558594,369.682583,105513.789062,105513.789062


In [59]:
# Step 2: Define target: Predict next hour's price
df['target'] = df['price'].shift(-1)
df.dropna(inplace=True)

# Step 3: Feature and Target split
X = df.drop(columns=['timestamp', 'target', 'future_price']) # Drop timestamp and target-related
y = df['target']

# Step 4: Train-Test split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, shuffle=False)

## Ridge

In [60]:
# Ridge Regression
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
ridge_preds = ridge.predict(X_test)

# Evaluate
rmse = mean_squared_error(y_test, ridge_preds)
mae = mean_absolute_error(y_test, ridge_preds)
r2 = r2_score(y_test, ridge_preds)

print(f"✅ Ridge Regression Model Trained.")
print(f"📉 RMSE: {rmse:.2f}")
print(f"📈 MAE: {mae:.2f}")
print(f"🔁 R² Score: {r2:.2f}")

✅ Ridge Regression Model Trained.
📉 RMSE: 58541.08
📈 MAE: 204.32
🔁 R² Score: 0.55


In [61]:
# Save output for dashboard
df_result = df.iloc[len(df) - len(y_test):].copy()
df_result['predicted_price'] = ridge_preds
df_result.to_csv(MODEL_PATH/"btc_ridge_model.csv", index=False)

### Ridge - Tuned

In [62]:
# Define model and hyperparameter grid
ridge = Ridge()
params = {"alpha": [0.001, 0.01, 0.1, 1, 10, 100]}
ridge_grid = GridSearchCV(ridge, params, cv=5)
ridge_grid.fit(X, y)

# Best model
best_ridge = ridge_grid.best_estimator_
y_pred = best_ridge.predict(X)

# Evaluation
rmse = mean_squared_error(y, y_pred)
mae = mean_absolute_error(y, y_pred)
r2 = r2_score(y, y_pred)

print("✅ Tuned Ridge Regression")
print("Best Params:", ridge_grid.best_params_)
print(f"📉 RMSE: {rmse:.2f}")
print(f"📈 MAE: {mae:.2f}")
print(f"🔁 R² Score: {r2:.2f}")

✅ Tuned Ridge Regression
Best Params: {'alpha': 10}
📉 RMSE: 121510.76
📈 MAE: 240.79
🔁 R² Score: 0.97


In [63]:
# Save output for dashboard
df_result = df.iloc[len(df) - len(y_test):].copy()
df_result['predicted_price'] = y_pred[-len(y_test):]
df_result.to_csv(MODEL_PATH/"btc_ridge_tuned_model.csv", index=False)

# Save best ridge model
joblib.dump(best_ridge, MODEL_PATH / "btc_ridge_tuned.pkl")

['/Users/badboihy/btc_ml_dashboard/models/btc_ridge_tuned.pkl']

## Lasso Regressor

In [64]:
# Train Lasso Regressor
lasso = Lasso(alpha=1.0)
lasso.fit(X_train, y_train)
lasso_preds = lasso.predict(X_test)


In [65]:
# Evaluate
mse = mean_squared_error(y_test, lasso_preds)
mae = mean_absolute_error(y_test, lasso_preds)
r2 = r2_score(y_test, lasso_preds)

print("✅ Lasso Regression Model Trained.")
print(f"📉 RMSE: {mse**0.5:.2f}")
print(f"📈 MAE: {mae:.2f}")
print(f"🔁 R² Score: {r2:.2f}")


✅ Lasso Regression Model Trained.
📉 RMSE: 241.95
📈 MAE: 204.32
🔁 R² Score: 0.55


In [66]:
df_result = df.iloc[-len(y_test):].copy()
df_result['predicted_price'] = lasso_preds
df_result.to_csv(MODEL_PATH/"btc_lasso_model.csv", index=False)


In [67]:
# Save lasso model
joblib.dump(lasso, MODEL_PATH / "btc_lasso_model.pkl")

['/Users/badboihy/btc_ml_dashboard/models/btc_lasso_model.pkl']