# 🌬️ Air Quality Predictor: PM2.5 Forecast Using Machine Learning
This Colab notebook helps you train an ML model to predict PM2.5 air pollution levels based on historical data and weather conditions.

**Author**: [Your Name]  
**Date**: 2025-07-03  
**Tools**: Python, Pandas, Scikit-learn, XGBoost  
**Data Source**: U.S. EPA AQS + Weather API  


In [None]:
!pip install xgboost pandas matplotlib scikit-learn --quiet


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import xgboost as xgb


In [None]:
# Replace with actual CSV paths or EPA data links
# Example: "https://example.com/pm25_data.csv"
pm25_df = pd.read_csv('YOUR_PM25_DATA.csv')
weather_df = pd.read_csv('YOUR_WEATHER_DATA.csv')

# Merge data on date or timestamp
df = pd.merge(pm25_df, weather_df, on='date')
df.head()


In [None]:
# Convert date to datetime and sort
df['date'] = pd.to_datetime(df['date'])
df.sort_values('date', inplace=True)

# Fill missing values
df.fillna(method='ffill', inplace=True)

# Create new features
df['pm25_3day_avg'] = df['pm25'].rolling(window=3).mean()
df['temp_change'] = df['temperature'].diff()

# Drop rows with missing values from rolling mean
df = df.dropna()

features = ['temperature', ' 'humidity', 'wind_speed', 'pm25_3day_avg', 'temp_change']
target = 'pm25'

X = df[features]
y = df[target]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = xgb.XGBRegressor(objective='reg:squarederror')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("RMSE:", rmse)
print("R^2 Score:", r2)


In [None]:
plt.figure(figsize=(10, 5))
plt.plot(y_test.values, label='Actual PM2.5')
plt.plot(y_pred, label='Predicted PM2.5')
plt.legend()
plt.title('Actual vs Predicted PM2.5')
plt.xlabel('Sample')
plt.ylabel('PM2.5')
plt.show()


## 🖥️ Gradio Web App Interface

In [None]:
# Gradio interface for real-time predictions
import gradio as gr

def predict_pm25(temp, humidity, wind_speed, pm25_yesterday, temp_change):
    features = [[temp, humidity, wind_speed, pm25_yesterday, temp_change]]
    prediction = model.predict(features)[0]
    return round(prediction, 2)

interface = gr.Interface(
    fn=predict_pm25,
    inputs=[
        gr.Number(label="Temperature (°C)"),
        gr.Number(label="Humidity (%)"),
        gr.Number(label="Wind Speed (m/s)"),
        gr.Number(label="Yesterday's PM2.5"),
        gr.Number(label="Temperature Change")
    ],
    outputs=gr.Number(label="Predicted PM2.5"),
    title="🌬️ PM2.5 Forecast Predictor",
    description="Enter today's conditions to forecast tomorrow's air pollution level (PM2.5)."
)

interface.launch()
