In [1]:
# training_notebook.ipynb

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# Models
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVR
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor

# 1. Load the data
df = pd.read_csv('../Data/processed_data.csv')

# 2. Split features and target
X = df.drop('Price', axis=1)
y = df['Price']

# 3. Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Model Definitions
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "SVM (SVR)": LinearSVR(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "AdaBoost": AdaBoostRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42)
}

# 5. Train & Evaluate
results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    results.append({
        "Model": name,
        "R² Score": r2,
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse
    })

# 6. Results DataFrame
results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by="R² Score", ascending=False)
print("📊 Model Comparison:")
print(results_df)

# 7. Save top model (optional)
best_model_name = results_df.iloc[0]["Model"]
print(f"\n✅ Best Performing Model: {best_model_name}")


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=1000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


📊 Model Comparison:
                 Model  R² Score          MAE           MSE         RMSE
2        Random Forest  0.827558  1136.205301  3.322330e+06  1822.726101
4    Gradient Boosting  0.778250  1491.292082  4.272321e+06  2066.959414
3             AdaBoost  0.545462  2421.259311  8.757303e+06  2959.273998
1            SVM (SVR)  0.241157  2623.824230  1.462014e+07  3823.629442
0  Logistic Regression -0.091510  2791.696667  2.102943e+07  4585.786181

✅ Best Performing Model: Random Forest
