# Environmental Monitoring — Training Notebook
This notebook generates synthetic data, trains an AQI regressor and AQI classifier, and saves models to `model/`.


In [11]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report
import joblib
import os


In [12]:
df = pd.read_csv(r'C:\Users\lenovo\Desktop\Environmental Monitoring & Pollution Control\environment.csv')
df.head()

Unnamed: 0,pm2_5,pm10,no2,co,o3,so2,temp,humidity,wind_speed,ph,turbidity,dissolved_oxygen,aqi,aqi_category
0,69.141512,99.004669,33.804249,1.245385,21.550009,1.908579,28.169621,65.696437,3.623985,8.028005,19.6188,4.660653,153.257673,3
1,28.800477,87.866282,41.204723,1.905716,49.390986,9.62082,32.108505,43.860973,4.386224,6.389898,4.239545,5.584775,89.556492,1
2,82.513536,124.561745,15.184881,0.331846,21.346997,1.0,28.113701,42.044932,7.981991,7.508251,0.904978,3.213752,200.890615,4
3,88.216941,114.269327,12.244929,0.418183,37.099399,4.277153,26.723821,43.776393,4.119822,7.595102,3.825659,6.360764,231.224789,4
4,1.468944,1.0,43.645956,0.653793,27.491911,8.046469,28.191506,53.781252,1.925214,7.116703,4.291615,6.140923,8.769349,0


In [13]:
features = ['pm2_5','pm10','no2','co','o3','so2','temp','humidity','wind_speed']
X = df[features]
y_reg = df['aqi']
y_cls = df['aqi_category']
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X, y_reg, test_size=0.2, random_state=42)
scaler_r = StandardScaler()
X_train_r_s = scaler_r.fit_transform(X_train_r)
X_test_r_s = scaler_r.transform(X_test_r)


In [14]:
import pandas as pd

# Load dataset
df = pd.read_csv("environment.csv")

# Quick check
print(df.head())
print(df.columns)


       pm2_5        pm10        no2        co         o3       so2       temp  \
0  69.141512   99.004669  33.804249  1.245385  21.550009  1.908579  28.169621   
1  28.800477   87.866282  41.204723  1.905716  49.390986  9.620820  32.108505   
2  82.513536  124.561745  15.184881  0.331846  21.346997  1.000000  28.113701   
3  88.216941  114.269327  12.244929  0.418183  37.099399  4.277153  26.723821   
4   1.468944    1.000000  43.645956  0.653793  27.491911  8.046469  28.191506   

    humidity  wind_speed        ph  turbidity  dissolved_oxygen         aqi  \
0  65.696437    3.623985  8.028005  19.618800          4.660653  153.257673   
1  43.860973    4.386224  6.389898   4.239545          5.584775   89.556492   
2  42.044932    7.981991  7.508251   0.904978          3.213752  200.890615   
3  43.776393    4.119822  7.595102   3.825659          6.360764  231.224789   
4  53.781252    1.925214  7.116703   4.291615          6.140923    8.769349   

   aqi_category  
0             3  
1 

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Features (X) and target (y)
X = df.drop(["aqi", "aqi_category"], axis=1)   # all input features
y = df["aqi"]                                  # target for regression

# Train-test split
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling
scaler_r = StandardScaler()
X_train_r_s = scaler_r.fit_transform(X_train_r)
X_test_r_s = scaler_r.transform(X_test_r)


In [16]:
reg = RandomForestRegressor(n_estimators=150, max_depth=18, random_state=42, n_jobs=-1)
reg.fit(X_train_r_s, y_train_r)
preds_r = reg.predict(X_test_r_s)

print("R2:", r2_score(y_test_r, preds_r))

mse = mean_squared_error(y_test_r, preds_r)
rmse = np.sqrt(mse)
print("RMSE:", rmse)

os.makedirs("model", exist_ok=True)
joblib.dump(reg, "model/aqi_regressor.pkl")
joblib.dump(scaler_r, "model/scaler_reg.pkl")
print("✅ Saved regressor and scaler")


R2: 0.9711539973898412
RMSE: 10.573879385583478
✅ Saved regressor and scaler


In [17]:
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X, y_cls, test_size=0.2, random_state=42, stratify=y_cls)
scaler_c = StandardScaler()
X_train_c_s = scaler_c.fit_transform(X_train_c)
X_test_c_s = scaler_c.transform(X_test_c)
clf = RandomForestClassifier(n_estimators=150, max_depth=18, random_state=42, n_jobs=-1)
clf.fit(X_train_c_s, y_train_c)
preds_c = clf.predict(X_test_c_s)
print('Accuracy:', accuracy_score(y_test_c, preds_c))
print(classification_report(y_test_c, preds_c))
joblib.dump(clf, 'model/aqi_classifier.pkl')
joblib.dump(scaler_c, 'model/scaler_clf.pkl')
print('Saved classifier and scaler')


Accuracy: 0.8433333333333334
              precision    recall  f1-score   support

           0       0.92      0.81      0.86        72
           1       0.81      0.83      0.82       188
           2       0.82      0.85      0.84       341
           3       0.85      0.83      0.84       350
           4       0.87      0.90      0.89       237
           5       0.86      0.50      0.63        12

    accuracy                           0.84      1200
   macro avg       0.86      0.79      0.81      1200
weighted avg       0.84      0.84      0.84      1200

Saved classifier and scaler


In [18]:
iso = IsolationForest(n_estimators=200, contamination=0.02, random_state=42)
iso.fit(X)
joblib.dump(iso, 'model/anomaly_detector.pkl')
print('Saved anomaly detector')


Saved anomaly detector


In [19]:
import joblib, os

# Create a folder to store models
os.makedirs("model", exist_ok=True)

# Save trained models
joblib.dump(reg, "model/aqi_regressor.pkl")
joblib.dump(clf, "model/aqi_classifier.pkl")

print("✅ Models saved in 'model/' folder")


✅ Models saved in 'model/' folder


In [20]:
import joblib
import os

# Create a folder to store model
os.makedirs("model", exist_ok=True)

# Save model and scaler
joblib.dump(reg, "model/aqi_regressor.pkl")
joblib.dump(scaler_r, "model/scaler_reg.pkl")

print("✅ Model and scaler saved successfully!")


✅ Model and scaler saved successfully!
