In [None]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
import math

# Load dataset
def load_data(file_path):
    df = pd.read_csv(file_path, parse_dates=['date'])
    df.columns = df.columns.str.strip().str.lower()  # Standardizing column names
    print("Columns in dataset:", df.columns)  # Debugging step
    return df

# Preprocess data for a specific city
def preprocess_data(df, city_name):
    city_df = df[df['city'].str.lower() == city_name.lower()]
    city_df = city_df[['date', 'index value']].rename(columns={'date': 'ds', 'index value': 'y'})  # Fix column names
    return city_df

# Train Prophet Model
def train_prophet_model(df):
    model = Prophet()
    model.fit(df)
    return model

# Predict Future AQI
def predict_future_aqi(model, future_date):
    future = pd.DataFrame({'ds': [future_date]})
    forecast = model.predict(future)
    return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

# Evaluate Model
def evaluate_model(model, df):
    future = model.make_future_dataframe(periods=30)  # Predict next 30 days for testing
    forecast = model.predict(future)

    # Merge actuals and predicted
    actual_df = df.copy()
    actual_df.set_index('ds', inplace=True)
    forecast.set_index('ds', inplace=True)

    merged_df = actual_df.join(forecast[['yhat']], how='inner')

    # Compute Errors
    mae = mean_absolute_error(merged_df['y'], merged_df['yhat'])
    rmse = math.sqrt(mean_squared_error(merged_df['y'], merged_df['yhat']))

    return mae, rmse

# Main function to run the prediction
def main():
    file_path = r'C:\Users\ASUS\Desktop\Climate1\AQI.csv'  # Change this to your actual file path
    df = load_data(file_path)

    city_name = input("Enter city name: ")
    future_date = input("Enter future date (YYYY-MM-DD): ")

    # Preprocess
    city_data = preprocess_data(df, city_name)

    if city_data.empty:
        print("City not found in dataset. Please check the input.")
        return

    # Train model
    model = train_prophet_model(city_data)

    # Predict
    forecast = predict_future_aqi(model, future_date)

    # Show Prediction
    print("\nPredicted AQI (Index Value) for", city_name, "on", future_date)
    print(forecast)

    # Evaluate model
    mae, rmse = evaluate_model(model, city_data)
    print("\nModel Evaluation:")
    print(f"Mean Absolute Error (MAE): {mae:.2f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

if __name__ == "__main__":
    main()


Columns in dataset: Index(['date', 'city', 'no. stations', 'air quality', 'index value',
       'prominent pollutant', 'region'],
      dtype='object')


20:27:50 - cmdstanpy - INFO - Chain [1] start processing
20:27:51 - cmdstanpy - INFO - Chain [1] done processing



Predicted AQI (Index Value) for Hyderabad on 2025-02-28
          ds      yhat  yhat_lower  yhat_upper
0 2025-02-28  93.88255   52.916739  130.675626

Model Evaluation:
Mean Absolute Error (MAE): 22.19
Root Mean Squared Error (RMSE): 30.67


In [None]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
import math

# Load dataset
def load_data(file_path):
    df = pd.read_csv(file_path, parse_dates=['date'])
    df.columns = df.columns.str.strip().str.lower()  # Standardizing column names
    print("Columns in dataset:", df.columns)  # Debugging step
    return df

# Preprocess data for a specific city
def preprocess_data(df, city_name):
    city_df = df[df['city'].str.lower() == city_name.lower()]
    if city_df.empty:
        print("Error: City not found in dataset. Please check the input.")
        return pd.DataFrame()  # Return empty DataFrame to handle errors
    
    city_df = city_df[['date', 'index value']].rename(columns={'date': 'ds', 'index value': 'y'})  # Standardized column names
    return city_df

# Train Prophet Model with Log Scaling and Seasonality
def train_prophet_model(df):
    df['y'] = np.log1p(df['y'])  # Apply log transformation to handle large values

    model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False)
    model.add_seasonality(name='monthly', period=30.5, fourier_order=5)  # Adding monthly seasonality
    
    model.fit(df)
    return model

# Predict Future AQI with Adjustments
def predict_future_aqi(model, future_date):
    future = pd.DataFrame({'ds': [future_date]})
    forecast = model.predict(future)

    # Convert predictions back to normal scale
    forecast[['yhat', 'yhat_lower', 'yhat_upper']] = np.expm1(forecast[['yhat', 'yhat_lower', 'yhat_upper']])

    # Ensure no negative predictions
    forecast[['yhat', 'yhat_lower', 'yhat_upper']] = forecast[['yhat', 'yhat_lower', 'yhat_upper']].clip(lower=0)

    return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

# Evaluate Model
def evaluate_model(model, df):
    future = model.make_future_dataframe(periods=30)  # Predict next 30 days for testing
    forecast = model.predict(future)

    # Convert back from log scale
    forecast[['yhat']] = np.expm1(forecast[['yhat']])

    # Merge actuals and predicted
    actual_df = df.copy()
    actual_df.set_index('ds', inplace=True)
    forecast.set_index('ds', inplace=True)

    merged_df = actual_df.join(forecast[['yhat']], how='inner')

    # Compute Errors
    mae = mean_absolute_error(merged_df['y'], merged_df['yhat'])
    rmse = math.sqrt(mean_squared_error(merged_df['y'], merged_df['yhat']))

    return mae, rmse

# Main function to run the prediction
def main():
    file_path = r'C:\Users\ASUS\Desktop\Climate1\AQI.csv'  # Change this to your actual file path
    df = load_data(file_path)

    city_name = input("Enter city name: ")
    future_date = input("Enter future date (YYYY-MM-DD): ")

    # Preprocess
    city_data = preprocess_data(df, city_name)
    
    if city_data.empty:
        return  # Exit if no data is found for the city

    # Train model
    model = train_prophet_model(city_data)

    # Predict
    forecast = predict_future_aqi(model, future_date)

    # Show Prediction
    print("\nPredicted AQI (Index Value) for", city_name, "on", future_date)
    print(forecast)

    # Evaluate model
    mae, rmse = evaluate_model(model, city_data)
    print("\nModel Evaluation:")
    print(f"Mean Absolute Error (MAE): {mae:.2f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

if __name__ == "__main__":
    main()


Columns in dataset: Index(['date', 'city', 'no. stations', 'air quality', 'index value',
       'prominent pollutant', 'region'],
      dtype='object')


: 