In [1]:
import pandas as pd
import numpy as np
import joblib
import certifi
import os
from pymongo import MongoClient
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 1. Connect to MongoDB using environment variables for security
MONGO_URI = os.getenv("MONGO_URI", "mongodb+srv://ali321hasnain_db_user:etRWe1e6ASFlpwEO@cluster0.1eklm6h.mongodb.net/?appName=Cluster0")
ca = certifi.where()
client = MongoClient(MONGO_URI, tlsCAFile=ca)
db = client["AQIPredictionSystem"]
collection = db["karachi_features"]

# 2. Load Data
print("ðŸ“¥ Fetching data from MongoDB...")
df = pd.DataFrame(list(collection.find({}, {'_id': 0})))

if 'timestamp' in df.columns:
    df = df.sort_values('timestamp')

# 3. Create 72-hour targets
target_cols = []
for i in range(1, 73):
    col_name = f'aqi_{i}h'
    df[col_name] = df['aqi'].shift(-i)
    target_cols.append(col_name)

df = df.dropna()

# 4. Define Features and Targets
X = df.drop(columns=target_cols + ['aqi'] + (['timestamp'] if 'timestamp' in df.columns else []))
y = df[target_cols]

# 5. Split and Scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 6. Train ONLY Random Forest
print("ðŸ“Š Training Random Forest Model...")
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# 7. Evaluate
preds = rf_model.predict(X_test_scaled)
print(f"âœ… Random Forest Trained. R2 Score: {r2_score(y_test, preds):.4f}")

# 8. Save local artifacts for registration
joblib.dump(rf_model, "Random_Forest_model.pkl")
joblib.dump(scaler, "scaler.pkl")
print("ðŸ’¾ Model and scaler saved locally.")

ðŸ“¥ Fetching data from MongoDB...
ðŸ“Š Training Random Forest Model...
âœ… Random Forest Trained. R2 Score: 0.8672
ðŸ’¾ Model and scaler saved locally.
