In [697]:
# model.py

# House Price Prediction – Model Training Script
# Author: Ali Omar Abdi
# Course: Data Science & Machine Learning Bootcamp
# Task: Regression (Supervised Learning)

# === Step 0: Import Libraries ===
import pandas as pd
import numpy as np

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Models & Pickle for saving
import joblib
import json
import os

print("✅ Libraries imported successfully.")

✅ Libraries imported successfully.


In [698]:
# === Step 1: Load Dataset ===
CSV_PATH = "./Dataset/Bitcoin - Dataset.csv"  # Change path if needed
df = pd.read_csv(CSV_PATH)
print(f"\n✅ Dataset loaded successfully from {CSV_PATH}")
print("\n=== DATASET HEAD ===")
print(df.head())


✅ Dataset loaded successfully from ./Dataset/Bitcoin - Dataset.csv

=== DATASET HEAD ===
         Date        Open        High         Low       Close   Adj Close  \
0  2014-09-17  465.864014  468.174011  452.421997  457.334015  457.334015   
1  2014-09-18  456.859985  456.859985  413.104004  424.440002  424.440002   
2  2014-09-19  424.102997  427.834991  384.532013  394.795990  394.795990   
3  2014-09-20  394.673004  423.295990  389.882996  408.903992  408.903992   
4  2014-09-21  408.084991  412.425995  393.181000  398.821014  398.821014   

     Volume  
0  21056800  
1  34483200  
2  37919700  
3  36863600  
4  26580100  


In [699]:
#  === Step 2: Drop the Date column (not useful for numeric models)
df = df.drop(columns=["Date"], errors="ignore")
print("✅ Date column dropped successfully.")

✅ Date column dropped successfully.


In [700]:
#  === Step 3: Define features and target variable
X = df.drop(columns=["Close"])  # Features: everything except Close price
y = df["Close"]                 # Target: the closing price
print("✅ Features and target variable defined.")

✅ Features and target variable defined.


In [701]:
#  === Step 4: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Display dataset split information
print("✅ Data successfully split into training and testing sets.")
print(f"Training set size: {X_train.shape[0]} samples")
print(f"Testing set size: {X_test.shape[0]} samples")

✅ Data successfully split into training and testing sets.
Training set size: 2170 samples
Testing set size: 543 samples


In [702]:
# === Step 5: Train Models ===
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest Regressor": RandomForestRegressor(n_estimators=100, random_state=42),
}
for name, model in models.items():
    model.fit(X_train, y_train)
    print(f"✅ {name} trained successfully.")

✅ Linear Regression trained successfully.


✅ Random Forest Regressor trained successfully.


In [703]:
# Step 6: Predict using the trained models
# Use Linear Regression and Random Forest to predict the target values for the test set
y_pred_lr = models["Linear Regression"].predict(X_test)
y_pred_rf = models["Random Forest Regressor"].predict(X_test)

# Show results
print("✅ Predictions are ready!")
print("\nLinear Regression predictions:", y_pred_lr)
print("\nRandom Forest predictions:", y_pred_rf)

✅ Predictions are ready!

Linear Regression predictions: [  386.54901101  4017.268555    7448.307617    1133.25000001
 43099.69921899   243.59399401   386.35400401 17706.900391
 55907.19921897 41821.26171899   777.94397001  2548.29003901
  4073.26001001   281.65399201 11657.200195    9771.489258
  1205.01001001 50429.859375   57229.82812499   447.61099201
  8745.89453099  8820.52246098 11600.099609   57401.09765598
   447.97601301 10948.99023399   455.67099001  5903.43994101
  1929.81994601  8550.760742    7068.47998001 65466.83984399
  9525.75097699  6423.75976601   456.07800301   416.43798801
 41500.87499999   973.49700901   580.18200701   254.32000701
   374.78500401   758.70001201  6371.29980501  7037.58007801
   217.11099201  8206.145508   61888.83203099 10400.91503898
  9800.63671899   422.74499501   657.07098401  3894.13085901
  9264.813477    9412.61230499  1100.22998001   228.57299801
   375.49099701  1187.81005901 35615.87109399  9344.36523399
   376.52200301  1211.67004401  

In [704]:
# === Step 7: Metrics Function ===
def evaluate_model(model_name, y_pred, y_true):
    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)

    print(f"\n{model_name} Performance:")
    print(f"  R²   : {r2:.3f}")
    print(f"  MAE  : {mae:.0f}")
    print(f"  MSE  : {mse:.0f}")
    print(f"  RMSE : {rmse:.0f}")

evaluate_model("Linear Regression", y_pred_lr, y_test)
evaluate_model("Random Forest Regressor", y_pred_rf, y_test)





Linear Regression Performance:
  R²   : 1.000
  MAE  : 0
  MSE  : 0
  RMSE : 0

Random Forest Regressor Performance:
  R²   : 1.000
  MAE  : 30
  MSE  : 9153
  RMSE : 96


In [705]:
# === Step 8: Single-row Sanity Check ===
i = 3
x_one_df = X_test.iloc[[i]]
y_true = y_test.iloc[i]
p_lr_one = float(models["Linear Regression"].predict(x_one_df)[0])
p_rf_one = float(models["Random Forest Regressor"].predict(x_one_df)[0])
print("\n=== Single-row Sanity Check (Index 3) ===")
print(f"  Actual Close Price : {y_true:.0f}")
print(f"  LR Predicted       : {p_lr_one:.0f}")
print(f"  RF Predicted       : {p_rf_one:.0f}")


=== Single-row Sanity Check (Index 3) ===
  Actual Close Price : 1133
  LR Predicted       : 1133
  RF Predicted       : 1140


In [706]:
# === Step 9: Save  Models  === (New)
joblib.dump(models["Linear Regression"], "./Models/bitcoin_lr_model.joblib")
joblib.dump(models["Random Forest Regressor"], "./Models/bitcoin_rf_model.joblib")

print("\n✅ Models saved successfully to the Models/ directory.")


✅ Models saved successfully to the Models/ directory.
