In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

# Load the data (replace with your actual data)
df = pd.read_csv('global-air-pollution-dataset.csv')

# Encode AQI Categories (Good=0, Moderate=1, Unhealthy=2)
df['AQI Category'] = df['AQI Category'].map({'Good': 0, 'Moderate': 1, 'Unhealthy': 2, 'Unhealthy for Sensitive Groups': 3})

# Select relevant features (SO2, NO2, PM10, PM2.5)
X = df[['SO2 AQI Value', 'NO2 AQI Value', 'PM10 AQI Value', 'PM2.5 AQI Value']].values
y = df['AQI Category'].values

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [None]:
# Check for missing values in the dataset
print(df.isnull().sum())


In [None]:
df.dropna(subset=['AQI Category'], inplace=True)


In [None]:
print(df.isnull().sum())  # Should show no missing values now


In [None]:
# Prepare the features and target variable again
X = df[['SO2 AQI Value', 'NO2 AQI Value', 'PM10 AQI Value', 'PM2.5 AQI Value']].values
y = df['AQI Category'].values

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Random Forest model
model.fit(X_train, y_train)


In [None]:
from scipy.optimize import minimize

def counterfactual_explanation(model, scaler, original_data, target_class, feature_names):
    """
    Generate a counterfactual explanation for the given original data point
    that would lead to a desired target class.
    
    Args:
    - model: trained machine learning model
    - scaler: scaler used for preprocessing the data
    - original_data: original data point for which counterfactual is required
    - target_class: the desired target class (e.g., 1 for 'Moderate')
    - feature_names: list of feature names (SO2, NO2, PM10, PM2.5)
    
    Returns:
    - counterfactual_data: adjusted feature values that change the prediction to target_class
    """
    
    # Define the objective function: the goal is to minimize the change required to predict the target_class
    def objective_function(x):
        # Rescale the input data point
        adjusted_data = scaler.inverse_transform([x])
        adjusted_prediction = model.predict(adjusted_data)[0]
        # Minimize the difference between current prediction and target_class
        return abs(adjusted_prediction - target_class)
    
    # Initial guess: the original data point
    initial_guess = original_data
    
    # Optimize to minimize the difference between predicted class and target class
    result = minimize(objective_function, initial_guess, bounds=[(0, 500)] * len(feature_names))
    
    # Return the adjusted features
    counterfactual_data = scaler.inverse_transform([result.x])
    return dict(zip(feature_names, counterfactual_data[0]))

# Example Usage
original_data = X_test[0]  # Take a sample from the test set
target_class = 2  # Target class (e.g., 'Moderate' AQI category)
feature_names = ['SO2 AQI Value', 'NO2 AQI Value', 'PM10 AQI Value', 'PM2.5 AQI Value']

counterfactual = counterfactual_explanation(model, scaler, original_data, target_class, feature_names)
print("Counterfactual Explanation:", counterfactual)


In [None]:
# Filter the data to include only India
df_india = df[df['Country'] == 'India']

# Prepare the features and target variable again for the India dataset
X_india = df_india[['SO2 AQI Value', 'NO2 AQI Value', 'PM10 AQI Value', 'PM2.5 AQI Value']].values
y_india = df_india['AQI Category'].values

# Split data into training and testing sets for India data
X_train_india, X_test_india, y_train_india, y_test_india = train_test_split(X_india, y_india, test_size=0.2, random_state=42)

# Standardize features
X_train_india = scaler.fit_transform(X_train_india)
X_test_india = scaler.transform(X_test_india)

# Train Random Forest model on India data
model.fit(X_train_india, y_train_india)

# Example Usage for Counterfactual Explanation
original_data_india = X_test_india[0]  # Take a sample from the India test set
target_class = 1  # Target class (e.g., 'Unhealthy' AQI category, but you can change this)
feature_names = ['SO2 AQI Value', 'NO2 AQI Value', 'PM10 AQI Value', 'PM2.5 AQI Value']

# Generate counterfactual explanation for the selected India sample
counterfactual_india = counterfactual_explanation(model, scaler, original_data_india, target_class, feature_names)
print("Counterfactual Explanation for India:", counterfactual_india)


In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load your dataset (replace this with your actual CSV file path)
df = pd.read_csv('global-air-pollution-dataset.csv')  # replace 'your_data.csv' with the actual file name

# Filter India country data
india_df = df[df['Country'] == 'India']

# Select the relevant columns (Pollutants and AQI Category)
X = india_df[['SO2 AQI Value', 'PM10 AQI Value', 'NO2 AQI Value', 'PM2.5 AQI Value']].values
y = india_df['AQI Category']

# Label encode the AQI Category (e.g., 'Good', 'Moderate', 'Unhealthy')
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Function to generate counterfactual explanations with regularization
def generate_counterfactual(X_original, target_class, model, lambda_reg=0.1, max_iter=1000, learning_rate=0.01):
    """
    Generate a counterfactual explanation by adjusting the input features.
    """
    # Ensure the original data is in float format for gradient descent
    X_original = X_original.astype(np.float64)
    
    # Initialize the counterfactual with the original input
    X_counterfactual = np.copy(X_original)
    
    # Gradient descent optimization loop
    for _ in range(max_iter):
        # Predict the current class
        current_class = model.predict(X_counterfactual.reshape(1, -1))[0]
        
        # If the current class is the target class, we're done
        if current_class == target_class:
            break
        
        # Calculate the gradient of the loss function w.r.t. X' (counterfactual input)
        # Loss function: L = ||X' - X||^2 + lambda * ||X'||_1
        # Gradient of the first term (Euclidean distance)
        grad_distance = 2 * (X_counterfactual - X_original)
        
        # Gradient of the regularization term (L1 norm)
        grad_reg = lambda_reg * np.sign(X_counterfactual)
        
        # Total gradient
        grad = grad_distance + grad_reg
        
        # Update the counterfactual using gradient descent
        X_counterfactual -= learning_rate * grad
        
        # Clip the values to avoid extreme changes (keep the features within a valid range)
        X_counterfactual = np.clip(X_counterfactual, 0, 500)  # Assuming AQI values are within this range
        
    return X_counterfactual

# Example usage:
original_sample = X_test[0]  # Sample input from the test set
target_class = 1  # Target class ('Unhealthy' AQI category)
counterfactual = generate_counterfactual(original_sample, target_class, model)

print("Original Sample:", original_sample)
print("Counterfactual Sample:", counterfactual)

# Predict the counterfactual's AQI category
predicted_class = model.predict(counterfactual.reshape(1, -1))[0]
print(f"Predicted Class for Counterfactual: {le.inverse_transform([predicted_class])[0]}")


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score
import numpy as np

# Define hyperparameters for Grid Search
param_grid = {
    'n_estimators': [50, 100, 200],  # Number of trees
    'max_depth': [None, 10, 20, 30],  # Maximum depth of the tree
    'min_samples_split': [2, 5, 10],  # Minimum samples to split a node
    'min_samples_leaf': [1, 2, 4],  # Minimum samples required to be at a leaf node
    'max_features': ['auto', 'sqrt', 'log2'],  # Number of features to consider at each split
}

# Initialize the Random Forest model
rf = RandomForestClassifier(random_state=42)

# Initialize GridSearchCV with cross-validation
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)

# Fit the grid search to the training data
grid_search.fit(X_train, y_train)

# Best parameters found by GridSearchCV
print("Best Parameters from Grid Search:", grid_search.best_params_)

# Use the best estimator found by grid search
best_rf_model = grid_search.best_estimator_


In [None]:
# Predict the labels for the test set using the fine-tuned model
y_pred = best_rf_model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Classification Report (Precision, Recall, F1-Score)
print("Classification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# ROC-AUC Score (if it's a binary classification or multi-class)
if len(np.unique(y_test)) == 2:  # Binary classification
    roc_auc = roc_auc_score(y_test, best_rf_model.predict_proba(X_test)[:, 1])
    print("ROC-AUC Score:", roc_auc)
else:
    print("ROC-AUC cannot be computed for multi-class classification directly.")



In [None]:
# Function to generate counterfactual explanations with regularization
def generate_counterfactual(X_original, target_class, best_rf_model, lambda_reg=0.1, max_iter=1000, learning_rate=0.01):
    """
    Generate a counterfactual explanation by adjusting the input features.
    """
    # Ensure the original data is in float format for gradient descent
    X_original = X_original.astype(np.float64)
    
    # Initialize the counterfactual with the original input
    X_counterfactual = np.copy(X_original)
    
    # Gradient descent optimization loop
    for _ in range(max_iter):
        # Predict the current class
        current_class = best_rf_model.predict(X_counterfactual.reshape(1, -1))[0]
        
        # If the current class is the target class, we're done
        if current_class == target_class:
            break
        
        # Calculate the gradient of the loss function w.r.t. X' (counterfactual input)
        # Loss function: L = ||X' - X||^2 + lambda * ||X'||_1
        # Gradient of the first term (Euclidean distance)
        grad_distance = 2 * (X_counterfactual - X_original)
        
        # Gradient of the regularization term (L1 norm)
        grad_reg = lambda_reg * np.sign(X_counterfactual)
        
        # Total gradient
        grad = grad_distance + grad_reg
        
        # Update the counterfactual using gradient descent
        X_counterfactual -= learning_rate * grad
        
        # Clip the values to avoid extreme changes (keep the features within a valid range)
        X_counterfactual = np.clip(X_counterfactual, 0, 500)  # Assuming AQI values are within this range
        
    return X_counterfactual

# Example usage:
original_sample = X_test[21]  # Sample input from the test set
target_class = 2  # Target class ('Unhealthy' AQI category)
counterfactual = generate_counterfactual(original_sample, target_class, best_rf_model)

print("Original Sample:", original_sample)
print("Counterfactual Sample:", counterfactual)

# Predict the counterfactual's AQI category
predicted_class = best_rf_model.predict(counterfactual.reshape(1, -1))[0]
print(f"Predicted Class for Counterfactual: {le.inverse_transform([predicted_class])[0]}")

In [None]:
def get_aqi_info(aqi):
    """Determine AQI level and suggest measures to improve air quality."""
    if aqi <= 50:
        level = "Good"
        measures = [
            "Air quality is satisfactory.",
            "Maintain current practices to keep air clean.",
            "Promote green spaces and plant more trees.",
            "Encourage use of renewable energy sources.",
            "Continue community awareness programs to sustain good air quality.",
            "Regularly monitor air quality and take preventive measures as needed.",
            "Encourage local governments to maintain clean public spaces."
        ]
    elif aqi <= 100:
        level = "Moderate"
        measures = [
            "Air quality is acceptable but could be improved.",
            "Reduce outdoor burning and vehicle emissions.",
            "Encourage carpooling and use of public transport.",
            "Limit the use of high-emission vehicles.",
            "Expand green spaces and promote energy-efficient appliances.",
            "Use fuel-efficient vehicles and conduct regular vehicle maintenance.",
            "Promote the use of low-VOC (Volatile Organic Compound) products."
        ]
    elif aqi <= 150:
        level = "Unhealthy for Sensitive Groups"
        measures = [
            "Sensitive groups should reduce outdoor activities.",
            "Minimize vehicle usage and promote cycling or walking.",
            "Adopt cleaner fuels and energy-efficient appliances.",
            "Implement stricter industrial emission controls.",
            "Promote electric and hybrid vehicles and use cleaner cooking fuels.",
            "Increase funding for air quality research and monitoring technologies.",
            "Support local initiatives to improve waste management and recycling."
        ]
    elif aqi <= 200:
        level = "Unhealthy"
        measures = [
            "Everyone should limit prolonged outdoor exertion.",
            "Reduce energy consumption at home and workplaces.",
            "Avoid burning waste and encourage proper disposal.",
            "Enhance green cover in urban areas to absorb pollutants.",
            "Encourage the use of public transportation and renewable energy sources.",
            "Implement pollution control measures during construction activities.",
            "Collaborate with industries to adopt advanced pollution-reduction technologies."
        ]
    elif aqi <= 300:
        level = "Very Unhealthy"
        measures = [
            "Avoid outdoor activities and stay indoors as much as possible.",
            "Use air purifiers indoors to maintain clean air.",
            "Control industrial emissions and encourage renewable energy sources.",
            "Implement stricter regulations on construction dust.",
            "Deploy air-cleaning towers in highly polluted areas and monitor hotspots using drones.",
            "Provide subsidies for solar panels and wind turbines to reduce reliance on fossil fuels.",
            "Encourage the development of urban forests and green belts around cities."
        ]
    else:
        level = "Hazardous"
        measures = [
            "Health alert: Everyone should stay indoors and limit exposure.",
            "Use high-quality air purifiers indoors.",
            "Emergency reduction of emissions from vehicles and industries.",
            "Encourage government actions for immediate pollution control.",
            "Promote sustainable farming practices and reduce stubble burning.",
            "Provide masks and health support to vulnerable populations.",
            "Establish emergency air pollution response teams for critical areas."
        ]

    return level, measures

# Main program
def main():
    try:
        aqi = int(input("Enter the AQI value: "))
        level, measures = get_aqi_info(aqi)
        print(f"Air Quality Level: {level}")
        print("Measures:")
        for measure in measures:
            print(f"- {measure}")
    except ValueError:
        print("Please enter a valid numeric AQI value.")

if __name__ == "__main__":
    main()
