In [1]:
pip install pymongo

Note: you may need to restart the kernel to use updated packages.




In [5]:
import pandas as pd
import numpy as np
import joblib
import certifi
from pymongo import MongoClient
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 1. Connect to MongoDB
MONGO_URI = "mongodb+srv://ali321hasnain_db_user:etRWe1e6ASFlpwEO@cluster0.1eklm6h.mongodb.net/?appName=Cluster0"
ca = certifi.where()
client = MongoClient(MONGO_URI, tlsCAFile=ca)
db = client["AQIPredictionSystem"]
collection = db["karachi_features"]

# 2. Load Data
print("ðŸ“¥ Fetching data from MongoDB...")
df = pd.DataFrame(list(collection.find({}, {'_id': 0})))
X = df.drop(columns=['aqi'])
y = df['aqi']

# 3. Split Data (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Feature Scaling (Essential for Neural Networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Define the 4 Models
models = {
    "Ridge_Regression": Ridge(alpha=1.0),
    "Random_Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient_Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42),
    "Neural_Network_MLP": MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=1000, random_state=42)
}

# 6. Train and Evaluate
print("ðŸ“Š Training and Comparing Models...")
results = []

for name, model in models.items():
    # Training
    model.fit(X_train_scaled, y_train)
    preds = model.predict(X_test_scaled)
    
    # Metrics
    mae = mean_absolute_error(y_test, preds)
    mse = mean_squared_error(y_test, preds)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, preds)
    
    print(f"\n--- {name} ---")
    print(f"Accuracy (R2 Score): {r2:.4f}")
    print(f"Avg Error (MAE): {mae:.2f} AQI points")
    print(f"Root Mean Sq Error (RMSE): {rmse:.2f} AQI points")
    
    # Save each model
    joblib.dump(model, f"{name}_model.pkl")

# Save the scaler (Must be used during inference)
joblib.dump(scaler, "scaler.pkl")

print("\nâœ… All 4 models and the scaler have been saved as .pkl files!")





ðŸ“¥ Fetching data from MongoDB...
ðŸ“Š Training and Comparing Models...

--- Ridge_Regression ---
Accuracy (R2 Score): 1.0000
Avg Error (MAE): 0.02 AQI points
Root Mean Sq Error (RMSE): 0.03 AQI points

--- Random_Forest ---
Accuracy (R2 Score): 0.9970
Avg Error (MAE): 0.60 AQI points
Root Mean Sq Error (RMSE): 1.30 AQI points

--- Gradient_Boosting ---
Accuracy (R2 Score): 0.9995
Avg Error (MAE): 0.33 AQI points
Root Mean Sq Error (RMSE): 0.52 AQI points

--- Neural_Network_MLP ---
Accuracy (R2 Score): 0.9994
Avg Error (MAE): 0.35 AQI points
Root Mean Sq Error (RMSE): 0.56 AQI points

âœ… All 4 models and the scaler have been saved as .pkl files!
