In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


In [2]:
from google.colab import files
uploaded = files.upload()


Saving AQI-and-Lat-Long-of-Countries.csv to AQI-and-Lat-Long-of-Countries.csv


In [3]:
import pandas as pd

data = pd.read_csv("AQI-and-Lat-Long-of-Countries.csv")
data.head()


Unnamed: 0,AQI Value,CO AQI Value,Ozone AQI Value,NO2 AQI Value,PM2.5 AQI Value,lat,lng
0,51,1,36,0,51,44.7444,44.2031
1,41,1,5,1,41,-5.29,-44.49
2,41,1,5,1,41,-11.2958,-41.9869
3,66,1,39,2,66,37.1667,15.1833
4,34,1,34,0,20,53.0167,20.8833


In [4]:
X = data.drop(["AQI Value"], axis=1)
y = data["AQI Value"]

X.head(), y.head()


(   CO AQI Value  Ozone AQI Value  NO2 AQI Value  PM2.5 AQI Value      lat  \
 0             1               36              0               51  44.7444   
 1             1                5              1               41  -5.2900   
 2             1                5              1               41 -11.2958   
 3             1               39              2               66  37.1667   
 4             1               34              0               20  53.0167   
 
        lng  
 0  44.2031  
 1 -44.4900  
 2 -41.9869  
 3  15.1833  
 4  20.8833  ,
 0    51
 1    41
 2    41
 3    66
 4    34
 Name: AQI Value, dtype: int64)

In [5]:
from sklearn.model_selection import train_test_split

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train.shape, X_test.shape




((13356, 6), (3339, 6))

In [6]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression

# Step 5: Train base models
model_rf = RandomForestRegressor(random_state=42)
model_gb = GradientBoostingRegressor(random_state=42)
model_lr = LinearRegression()

model_rf.fit(X_train, y_train)
model_gb.fit(X_train, y_train)
model_lr.fit(X_train, y_train)

print("Base models trained successfully!")


Base models trained successfully!


In [7]:
import numpy as np

# Step 6: Ensemble predictions (simple averaging)
pred_rf = model_rf.predict(X_test)
pred_gb = model_gb.predict(X_test)
pred_lr = model_lr.predict(X_test)

ensemble_pred = (pred_rf + pred_gb + pred_lr) / 3

print("Ensemble prediction completed!")



Ensemble prediction completed!


In [8]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor

# Define models
rf = RandomForestRegressor()
gb = GradientBoostingRegressor()
xgb = XGBRegressor()



In [9]:
from sklearn.ensemble import VotingRegressor

ensemble_model = VotingRegressor([
    ("rf", rf),
    ("gb", gb),
    ("xgb", xgb)
])

ensemble_model.fit(X_train, y_train)


In [10]:
import pickle

with open("ensemble_model.pkl", "wb") as f:
    pickle.dump(ensemble_model, f)

print("Model saved successfully!")


Model saved successfully!


In [11]:
import pickle

# Load the saved model
with open("ensemble_model.pkl", "rb") as f:
    loaded_model = pickle.load(f)

print("Model loaded successfully!")

# Test prediction with one sample (replace X_test[0] with your data)
sample_pred = loaded_model.predict([X_test.iloc[0]])
print("Sample prediction:", sample_pred)
print("Actual value:", y_test.iloc[0])



Model loaded successfully!
Sample prediction: [37.65723165]
Actual value: 38




In [12]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor


In [13]:
# Linear Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

# Random Forest
rf_model = RandomForestRegressor()
rf_model.fit(X_train, y_train)

# Gradient Boosting
gb_model = GradientBoostingRegressor()
gb_model.fit(X_train, y_train)

# Support Vector Regression
svr_model = SVR()
svr_model.fit(X_train, y_train)

# KNN
knn_model = KNeighborsRegressor()
knn_model.fit(X_train, y_train)


In [14]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    return mae, mse, rmse, r2


In [15]:
models = {
    "Linear Regression": linear_model,
    "Random Forest": rf_model,
    "Gradient Boosting": gb_model,
    "SVR": svr_model,
    "KNN": knn_model
}

results = {}

for name, model in models.items():
    mae, mse, rmse, r2 = evaluate_model(model, X_test, y_test)
    results[name] = {
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,
        "R2 Score": r2
    }


In [16]:
for model_name, metrics in results.items():
    print(f"\nModel: {model_name}")
    print(f"MAE: {metrics['MAE']}")
    print(f"MSE: {metrics['MSE']}")
    print(f"RMSE: {metrics['RMSE']}")
    print(f"R2 Score: {metrics['R2 Score']}")



Model: Linear Regression
MAE: 4.126360949132347
MSE: 41.797808105382074
RMSE: 6.465122435451789
R2 Score: 0.9747980672314599

Model: Random Forest
MAE: 0.08995807127882603
MSE: 2.2813552860137762
RMSE: 1.5104156004271725
R2 Score: 0.9986244598665481

Model: Gradient Boosting
MAE: 0.8292023134435788
MSE: 3.1556967138259404
RMSE: 1.7764280773017354
R2 Score: 0.9980972768663077

Model: SVR
MAE: 2.920636773787887
MSE: 278.5382318508223
RMSE: 16.689464696353273
R2 Score: 0.8320557438113946

Model: KNN
MAE: 1.3165019466906263
MSE: 6.64303084755915
RMSE: 2.5774077767321084
R2 Score: 0.9959945933916576


In [17]:
results = {}

for name, model in models.items():
    mae, mse, rmse, r2 = evaluate_model(model, X_test, y_test)
    results[name] = {
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,
        "R2 Score": r2,
        "Accuracy (%)": r2 * 100
    }


In [18]:
import pandas as pd
df_results = pd.DataFrame(results).T
df_results



Unnamed: 0,MAE,MSE,RMSE,R2 Score,Accuracy (%)
Linear Regression,4.126361,41.797808,6.465122,0.974798,97.479807
Random Forest,0.089958,2.281355,1.510416,0.998624,99.862446
Gradient Boosting,0.829202,3.155697,1.776428,0.998097,99.809728
SVR,2.920637,278.538232,16.689465,0.832056,83.205574
KNN,1.316502,6.643031,2.577408,0.995995,99.599459


In [19]:
import joblib
joblib.dump(model, "aqi_model.pkl")


['aqi_model.pkl']

In [20]:
!pip install gradio




In [21]:
import gradio as gr
import joblib
import numpy as np


In [22]:
inputs = [
    gr.Dropdown(
        ["Hyderabad", "Delhi", "Mumbai", "Chennai", "Bangalore", "Kolkata"],
        label="City",
        info="Select the city"
    ),
    gr.Number(label="PM2.5"),
    gr.Number(label="PM10"),
    gr.Number(label="NO2"),
    gr.Number(label="SO2"),
    gr.Number(label="CO"),
    gr.Number(label="O3")
]


In [23]:
def predict_aqi(city, PM2_5, PM10, NO2, SO2, CO, O3):
    input_data = np.array([[PM2_5, PM10, NO2, SO2, CO, O3]])
    prediction = model.predict(input_data)[0]
    prediction = round(prediction, 2)

    # AQI Category
    if prediction <= 50:
        category = "Good 😊"
    elif prediction <= 100:
        category = "Satisfactory 🙂"
    elif prediction <= 200:
        category = "Moderate 😐"
    elif prediction <= 300:
        category = "Poor 😕"
    elif prediction <= 400:
        category = "Very Poor 😣"
    else:
        category = "Severe 😱"

    return city, prediction, category


In [24]:
outputs = [
    gr.Textbox(label="Selected City"),
    gr.Number(label="Predicted AQI"),
    gr.Textbox(label="AQI Category")
]


In [25]:
app = gr.Interface(
    fn=predict_aqi,
    inputs=inputs,
    outputs=outputs,
    title="Air Quality Index Prediction",
    description="Select your city, enter pollutant values, and get the AQI prediction + category."
)
app.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://7fb74745de471141ae.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


