<a href="https://colab.research.google.com/github/Cosmasrono/API/blob/main/Untitled9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_csv('/content/global air pollution dataset.csv')

# Handle missing values (if any)
data = data.dropna()

# Define features and target variable
X = data[['CO AQI Value', 'Ozone AQI Value', 'NO2 AQI Value', 'PM2.5 AQI Value']]
y = data['AQI Value']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the KNN model
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train_scaled, y_train)

# Predict AQI values for all cities
data['Predicted AQI'] = knn.predict(scaler.transform(X))

# Classify cities based on AQI
def classify_aqi(aqi):
    if aqi > 150:
        return 'Danger'
    elif 51 <= aqi <= 150:
        return 'Moderate'
    else:
        return 'Good'

# Apply the classification to the dataset
data['AQI Category'] = data['Predicted AQI'].apply(classify_aqi)

# Select the top 10 cities for each category
top_10_danger = data[data['AQI Category'] == 'Danger'].nlargest(10, 'Predicted AQI')
top_10_moderate = data[data['AQI Category'] == 'Moderate'].nlargest(10, 'Predicted AQI')
top_10_good = data[data['AQI Category'] == 'Good'].nsmallest(10, 'Predicted AQI')

# Plotting function
def plot_cities(cities, category, color):
    plt.figure(figsize=(12, 8))
    plt.barh(cities['City'], cities['Predicted AQI'], color=color)
    plt.xlabel('Predicted AQI Value')
    plt.ylabel('City')
    plt.title(f'Top 10 Cities with {category} Air Quality (Predicted)')
    plt.gca().invert_yaxis()  # Invert y-axis to show the highest AQI value at the top
    plt.show()

# Plot the cities for each category
plot_cities(top_10_danger, 'Danger', 'red')
plot_cities(top_10_moderate, 'Moderate', 'orange')
plot_cities(top_10_good, 'Good', 'green')


In [None]:
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train_scaled, y_train)

# Predict AQI values for the test set
y_pred = knn.predict(X_test_scaled)

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error (MSE): {mse:.2f}')
print(f'R-squared (R2) Score: {r2:.2f}')

# Plot predicted vs actual values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, color='blue', alpha=0.7)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red')  # Diagonal line representing perfect predictions
plt.title('Predicted vs Actual AQI Values (KNN)')
plt.xlabel('Actual AQI Values')
plt.ylabel('Predicted AQI Values')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
danger_threshold = 150  # Example threshold for "most dangerous"
moderate_threshold = 100  # Example threshold for "moderate"
# Assuming values below moderate_threshold are "good"

# Categorize predictions
y_pred_categories = []
for pred in y_pred:
    if pred >= danger_threshold:
        y_pred_categories.append("Most Dangerous")
    elif pred >= moderate_threshold:
        y_pred_categories.append("Moderate")
    else:
        y_pred_categories.append("Good")

# Count the occurrences of each category
category_counts = pd.Series(y_pred_categories).value_counts()

# Plotting the pie chart
plt.figure(figsize=(8, 8))
plt.pie(category_counts, labels=category_counts.index, autopct='%1.1f%%', startangle=140)
plt.title('Distribution of AQI Categories (Predicted)')
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()