In [462]:
# === Step 0: Import Libraries ===
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib
import json

print("✅ Libraries imported successfully.")

✅ Libraries imported successfully.


In [463]:
# === Step 1: Load Cleaned Dataset ===
CSV_PATH = "./Dataset/Bitcoin-Cleaned-Dataset.csv"
df = pd.read_csv(CSV_PATH)
print("✅ Cleaned dataset loaded successfully.\n")
print("=== HEAD ===")
print(df.head())

✅ Cleaned dataset loaded successfully.

=== HEAD ===
         Date      Open      High       Low     Close   Adj Close    Volume  \
0  2014-09-17 -0.885645 -0.884096 -0.886190 -0.886821  457.334015 -0.806132   
1  2014-09-18 -0.886700 -0.885381 -0.890919 -0.890669  424.440002 -0.805367   
2  2014-09-19 -0.890539 -0.888680 -0.894355 -0.894138  394.795990 -0.805171   
3  2014-09-20 -0.893987 -0.889196 -0.893711 -0.892487  408.903992 -0.805232   
4  2014-09-21 -0.892416 -0.890431 -0.893315 -0.893667  398.821014 -0.805817   

   Year  Month_2  Month_3  ...  Month_12  DayOfWeek_1  DayOfWeek_2  \
0  2014    False    False  ...     False        False         True   
1  2014    False    False  ...     False        False        False   
2  2014    False    False  ...     False        False        False   
3  2014    False    False  ...     False        False        False   
4  2014    False    False  ...     False        False        False   

   DayOfWeek_3  DayOfWeek_4  DayOfWeek_5  DayOfWeek

In [464]:
# === Step 2: Prepare Features (X) and Target (y) ===
target = "Close"

# Load saved training columns (features used during preprocessing)
TRAIN_COLUMNS = json.load(open("models/train_columns.json"))

# Select features and target
X = df[TRAIN_COLUMNS]
y = df[target]

print(f"\n✅ Features (X) and target (y) prepared. Number of features: {X.shape[1]}")


✅ Features (X) and target (y) prepared. Number of features: 26


In [465]:
# === Step 3: Split Dataset into Train & Test Sets ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f"\n✅ Dataset split into training and testing sets:")
print(f"Training samples: {X_train.shape[0]}, Testing samples: {X_test.shape[0]}")


✅ Dataset split into training and testing sets:
Training samples: 2170, Testing samples: 543


In [466]:
# === Step 4: Train Linear Regression Model ===
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Predict on test set
y_pred_lr = lr_model.predict(X_test)

print("\n✅ Linear Regression model trained.")


✅ Linear Regression model trained.


In [467]:
# === Step 5: Train Random Forest Regressor ===
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict on test set
y_pred_rf = rf_model.predict(X_test)

print("\n✅ Random Forest Regressor model trained.")


✅ Random Forest Regressor model trained.


In [468]:
def evaluate_model(y_true, y_pred, model_name="Model"):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n{model_name} Performance:")
    print(f"  MSE  : {mse:.4f}")
    print(f"  MAE  : {mae:.4f}")
    print(f"  RMSE : {rmse:.4f}")
    print(f"  R2   : {r2:.4f}")

# Evaluate Linear Regression
evaluate_model(y_test, y_pred_lr, "Linear Regression")

# Evaluate Random Forest
evaluate_model(y_test, y_pred_rf, "Random Forest")


Linear Regression Performance:
  MSE  : 0.0000
  MAE  : 0.0000
  RMSE : 0.0000
  R2   : 1.0000

Random Forest Performance:
  MSE  : 0.0000
  MAE  : 0.0013
  RMSE : 0.0052
  R2   : 1.0000


In [469]:
# 7) Single-row sanity check
# Pick a row from the test set to compare actual vs predicted values
i = 3  # choose any index from X_test
x_one_df = X_test.iloc[[i]]  # keep as DataFrame
y_true = y_test.iloc[i]      # actual price

# Make predictions with both models
p_lr_one = float(lr_model.predict(x_one_df)[0])
p_rf_one = float(rf_model.predict(x_one_df)[0])

# Display results
print("\nSingle-row sanity check:")
print(f"  Actual Price: ${y_true:,.0f}")
print(f"  LR Predicted: ${p_lr_one:,.0f}")
print(f"  RF Predicted: ${p_rf_one:,.0f}")



Single-row sanity check:
  Actual Price: $-1
  LR Predicted: $-1
  RF Predicted: $-1


In [470]:
# === Step 7: Sanity Checks (Sample Predictions) ===
print("\n=== Sample Predictions Comparison ===")
sample = X_test.head(5)
sample_true = y_test.head(5)

print("\nInput Features (first 5 rows):")
print(sample)

print("\nActual Close Prices:")
print(sample_true.values)

print("\nLinear Regression Predictions:")
print(y_pred_lr[:5])

print("\nRandom Forest Predictions:")
print(y_pred_rf[:5])



=== Sample Predictions Comparison ===

Input Features (first 5 rows):
          Open      High       Low     Adj Close    Volume  Year  Month_2  \
506  -0.894664 -0.892856 -0.894230    386.549011 -0.804835  2016     True   
1535 -0.437638 -0.445996 -0.466408   4017.268555 -0.462753  2018    False   
1905 -0.090297 -0.057260 -0.070746   7448.307617  0.264694  2019    False   
930  -0.806003 -0.805874 -0.805839   1133.250000 -0.782473  2017    False   
2676  2.015219  2.016779  2.017954  43099.699219  0.535964  2022    False   

      Month_3  Month_4  Month_5  ...  Month_12  DayOfWeek_1  DayOfWeek_2  \
506     False    False    False  ...     False        False        False   
1535    False    False    False  ...     False        False        False   
1905    False    False    False  ...      True        False        False   
930     False     True    False  ...     False         True        False   
2676    False    False    False  ...     False        False        False   

      Day

In [471]:
# === Step 8: Save Trained Models ===
import os


os.makedirs("models", exist_ok=True)
joblib.dump(lr_model, "models/linear_regression_model.pkl")
joblib.dump(rf_model, "models/random_forest_model.pkl")

print("\n✅ Models saved successfully in 'models/' folder.")


✅ Models saved successfully in 'models/' folder.
