In [1]:
# Stage A - Train AQI ML Model with correct column names
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import joblib

# Load dataset
df = pd.read_csv("C:/Users/Purv Patel/Desktop/decimal_AQI_dataset.csv")

print("Dataset preview:")
print(df.head())

# Feature and target columns
X = df[["CO2(ppm)", "C6H6(ppm)", "Alcohol(ppm)", "NH3(ppm)",
        "Temperature(C)", "Relative_Humidity(%)"]]
y = df["AQI_NH3"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Model training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluation
y_pred = model.predict(X_test)
y_pred_rounded = [round(val, 2) for val in y_pred]  # keep 2 decimal precision

print("MSE:", mean_squared_error(y_test, y_pred))
print("Sample predictions (rounded):", y_pred_rounded[:10])

# Save model
joblib.dump(model, "aqi_model.pkl")
print("Model saved as aqi_model.pkl")


Dataset preview:
         Date   CO2(ppm)  C6H6(ppm)  Alcohol(ppm)   NH3(ppm)  Temperature(C)  \
0  01-01-2000  32.661288   7.022631     14.279397  34.018359       26.313346   
1  02-01-2000  39.218357   8.353962     20.375159  27.379990       29.458356   
2  03-01-2000  29.087690   9.121642     16.990197  40.427167       26.270195   
3  04-01-2000  33.227194   6.864017     20.074983  38.843177       28.677198   
4  05-01-2000  32.074707  10.344094     23.023945  34.779399       29.786500   

   Relative_Humidity(%)  AQI_NH3  
0             89.543598     8.26  
1             85.115738     6.04  
2             73.959660    10.64  
3             93.564520     9.60  
4             59.528167     8.51  
MSE: 0.0865298686
Sample predictions (rounded): [np.float64(9.26), np.float64(7.46), np.float64(10.42), np.float64(9.42), np.float64(7.59), np.float64(6.6), np.float64(6.39), np.float64(6.52), np.float64(7.48), np.float64(9.53)]
Model saved as aqi_model.pkl
