In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import xgboost as xgb
from sklearn.svm import SVR
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore", message=".*does not have valid feature names.*")

In [3]:
dataset = pd.read_csv('data/jayanagar_march_2023.csv')
train_df = pd.read_csv('data/train.csv')
test_df = pd.read_csv('data/test.csv')

X_train, y_train = train_df.drop('Value', axis=1), train_df['Value']
X_test, y_test = test_df.drop('Value', axis=1), test_df['Value']

In [None]:
models = {
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42),
    "XGBoost": xgb.XGBRegressor(n_estimators=100, random_state=42),
    "SVR": SVR(kernel='rbf')
}

In [32]:
import pickle

results = {}

for model_name, model in models.items():
    model.fit(X_train, y_train)
    with open(f'artifacts/{model_name}.pkl', "wb") as file:
        pickle.dump(model, file)
    
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    results[model_name] = {'mae': mae, 'rmse': rmse, 'y_pred': y_pred}
   
    print(f"{model_name} - MAE: {mae:.2f}, RMSE: {rmse:.2f}")

Random Forest - MAE: 1.24, RMSE: 2.07
Gradient Boosting - MAE: 1.31, RMSE: 1.91
XGBoost - MAE: 1.00, RMSE: 1.74
SVR - MAE: 3.14, RMSE: 4.44
