# Time-Series Forecasting of Carbon Monoxide (CO) and Nitrogen Dioxide (NO₂)
This project performs data cleaning, visualization, and time-series forecasting for air quality levels of CO and NO₂ using the Prophet model.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error
import seaborn as sns
import numpy as np

# For warnings
import warnings
warnings.filterwarnings('ignore')


## Load and Inspect Data

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/rashakil-ds/Public-Datasets/refs/heads/main/airquality.csv")
df = df.dropna(axis=1, how='all')  # drop empty columns
df = df.iloc[:, :15]  # remove unnamed columns

# Combine Date and Time
df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], dayfirst=True)
df.set_index('Datetime', inplace=True)
df = df[['CO(GT)', 'NO2(GT)']]
df = df.replace(-200, np.nan).dropna()

df.head()


ValueError: time data "2004-03-13 0:00:00" doesn't match format "%Y-%d-%m %H:%M:%S", at position 54. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.

## Visualize CO and NO₂ Levels

In [None]:
plt.figure(figsize=(15, 6))
sns.set(style="whitegrid")
plt.plot(df['CO(GT)'], label='CO (GT)', color='steelblue', linewidth=2)
plt.plot(df['NO2(GT)'], label='NO2 (GT)', color='firebrick', linewidth=2)
plt.title('Daily CO and NO₂ Concentrations Over Time', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Concentration (mg/m³)', fontsize=12)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


## Prepare Data for Forecasting

In [None]:
# Create two separate DataFrames for Prophet
df_co = df['CO(GT)'].resample('D').mean().reset_index()
df_no2 = df['NO2(GT)'].resample('D').mean().reset_index()

df_co.columns = ['ds', 'y']
df_no2.columns = ['ds', 'y']

# Drop NaNs
df_co.dropna(inplace=True)
df_no2.dropna(inplace=True)


## Forecast CO and NO₂ using Prophet

In [None]:
def forecast_prophet(df_input, pollutant='CO'):
    model = Prophet()
    model.fit(df_input)

    future = model.make_future_dataframe(periods=30)
    forecast = model.predict(future)

    # Plot forecast
    fig = model.plot(forecast)
plt.title(f"{pollutant} Forecast with Prophet", fontsize=16)
plt.xlabel("Date")
plt.ylabel("Concentration")
plt.grid(True)
plt.tight_layout()

    plt.title(f"{pollutant} Forecast")
    plt.show()

    return forecast

forecast_co = forecast_prophet(df_co, pollutant='CO')
forecast_no2 = forecast_prophet(df_no2, pollutant='NO2')


## Evaluate Forecast Performance

In [None]:
def evaluate_forecast(df_orig, forecast_df):
    merged = pd.merge(df_orig, forecast_df[['ds', 'yhat']], on='ds')
    y_true = merged['y']
    y_pred = merged['yhat']
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    print(f"RMSE: {rmse:.2f}")
    print(f"MAE: {mae:.2f}")

print("CO Forecast Evaluation:")
evaluate_forecast(df_co, forecast_co)

print("\nNO2 Forecast Evaluation:")
evaluate_forecast(df_no2, forecast_no2)


## ✅ Conclusion
- CO and NO₂ data were cleaned and visualized.
- Time-series forecasting was performed using Prophet.
- Forecast accuracy was evaluated using RMSE and MAE.
You can extend this by using ARIMA, LSTM, or multi-variate forecasting models.