<a href="https://colab.research.google.com/github/Arvind-55555/Eco-City-Optimization-An-ML-Driven-Approach-to-Sustainable-Urban-Planning-and-Management/blob/main/Eco_City_Optimization_An_ML_Driven_Approach_to_Sustainable_Urban_Planning_and_Management.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import requests
from datetime import datetime, timedelta
import time

In [2]:
# API Key (Replace with your own from https://aqicn.org/api/)
API_KEY = "YOUR_AQICN_API_KEY"

# City for which you want to get air quality data
CITY_NAME = "Hyderabad"


def fetch_aqi_data(city, api_key):
    """Fetches air quality data from Aqicn API."""
    url = f"https://api.waqi.info/feed/{city}/?token={api_key}"
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
        data = response.json()
        if data['status'] == 'ok':
            return data['data']
        else:
            print(f"API Error: {data['status']}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None

def preprocess_aqi_data(data):
    """Preprocesses the raw API response into a usable DataFrame."""
    if data is None:
        return None

    aqi = data.get('aqi')
    time_data = data.get('time', {})

    if not aqi or not time_data:
        print("Missing essential keys in API response.")
        return None

    df_data = {
        'aqi': [aqi],
        'time': [time_data['s']]
    }

    for pol in data.get('iaqi',{}):
      if pol != 'h' and pol != 't':
        df_data[pol] = [data['iaqi'][pol]['v']]

    df = pd.DataFrame(df_data)

    df['time'] = pd.to_datetime(df['time'])

    return df

# Creating Features

In [3]:
def create_features(df):
    """Creates features from the dataframe like Hour and minute, also handling missing data"""

    df['hour'] = df['time'].dt.hour
    df['minute'] = df['time'].dt.minute


    for pol in df.columns:
      if pol not in ['aqi', 'time','hour','minute']:
        df[pol] = df[pol].replace(r'^\s*$', np.nan, regex=True)  # Replace empty spaces with NaN
        df[pol] = pd.to_numeric(df[pol]) # Ensure numeric type
        df[pol] = df[pol].fillna(df[pol].mean())  # Fill NaN values with mean

    return df

# Training a Random Forest Regression Model

In [4]:
def train_model(df):
    """Trains a Random Forest Regression model."""
    if df is None:
      return None, None

    features = [col for col in df.columns if col not in ['aqi', 'time']]  # Select features
    X = df[features]
    y = df['aqi']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    print(f"Model Performance: MSE={mse:.2f}, MAE={mae:.2f}")
    return model, features

def make_prediction(model, features, data_df):
    """Makes prediction based on live data"""
    if model is None:
      return None


    X = data_df[features]
    prediction = model.predict(X)
    return prediction

In [None]:
if __name__ == "__main__":

    # Fetch Real-Time Data (Repeat this to get a time series)
    all_data = []
    for i in range (50):
        time.sleep(60)  # Wait before fetching new data

        raw_data = fetch_aqi_data(CITY_NAME, API_KEY)
        if raw_data:
            processed_df = preprocess_aqi_data(raw_data)
            if processed_df is not None:
                all_data.append(processed_df)
        else:
             print(f"Failed to retrieve data at {datetime.now()}")


    # Concat all the data points into one dataframe
    if len(all_data) > 0:
      df = pd.concat(all_data, ignore_index=True)
      df = create_features(df)
      model, features = train_model(df)

      # Make Prediction on New data point
      new_raw_data = fetch_aqi_data(CITY_NAME, API_KEY)
      new_processed_df = preprocess_aqi_data(new_raw_data)
      if new_processed_df is not None:
        new_processed_df = create_features(new_processed_df)
        if model:
           prediction = make_prediction(model, features,new_processed_df)
           print(f"Predicted AQI: {prediction[0]:.2f}")
        else:
           print ("No model was generated.")

    else:
      print("No data was retrieved.")

API Error: error
Failed to retrieve data at 2024-12-31 10:47:31.600595
API Error: error
Failed to retrieve data at 2024-12-31 10:48:32.339166
API Error: error
Failed to retrieve data at 2024-12-31 10:49:33.107852
API Error: error
Failed to retrieve data at 2024-12-31 10:50:33.846808
API Error: error
Failed to retrieve data at 2024-12-31 10:51:34.586101
API Error: error
Failed to retrieve data at 2024-12-31 10:52:35.312858
API Error: error
Failed to retrieve data at 2024-12-31 10:53:36.015360
