In [None]:
%matplotlib inline

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# For better visuals
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (12, 6)

def load_epa_file(filepath):
    """
    Load EPA CSV or Excel file, ensuring string types and parsing dates properly.
    """
    try:
        if filepath.endswith('.csv'):
            df = pd.read_csv(filepath, dtype=str, low_memory=False)
        else:
            df = pd.read_excel(filepath, dtype=str)
        print(f"Loaded file: {filepath}")
        return df
    except Exception as e:
        print(f"Error loading {filepath}: {e}")
        return pd.DataFrame()

def clean_and_prepare(df):
    """
    Basic cleaning and conversion: convert date and pollutant concentration columns.
    """
    df = df.copy()

    # Print column names for debugging
    print("\n--- Column Names ---")
    print(df.columns)

    # Rename known useful columns for consistency
    if 'date_local' in df.columns:
        df['Date'] = pd.to_datetime(df['date_local'], errors='coerce')
    if 'arithmetic_mean' in df.columns:
        df['Concentration'] = pd.to_numeric(df['arithmetic_mean'], errors='coerce')

    # Drop rows with missing critical values
    df = df.dropna(subset=['Date', 'Concentration'])

    return df[['Date', 'Concentration']]

def explore_data(df, title="Pollutant Trend"):
    """
    Simple EDA: descriptive stats and line plot.
    """
    print("\n--- Descriptive Statistics ---")
    print(df.describe())

    print("\n--- Data Info ---")
    print(df.info())

    # Resample data by month or any time period to reduce size
    df_resampled = df.set_index('Date').resample('M').mean().reset_index()

    print("\n--- Plotting Resampled Time Series ---")
    sns.lineplot(data=df_resampled, x='Date', y='Concentration', label='Pollutant Level')
    plt.title(title)
    plt.xlabel("Date")
    plt.ylabel("Concentration (µg/m³ or ppm)")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

def run_eda_on_all_files(data_dir):
    """
    Loop through all files in the epa_data_by_state/ directory and run EDA.
    """
    for file in os.listdir(data_dir):
        filepath = os.path.join(data_dir, file)
        df_raw = load_epa_file(filepath)
        if not df_raw.empty:
            df_cleaned = clean_and_prepare(df_raw)
            explore_data(df_cleaned, title=f"{file} Trend Over Time")

# Example usage
if __name__ == "__main__":
    data_folder = r'D:\env_monitoring_platform\backend\epa_data_by_state'  # Hardcoded path
    run_eda_on_all_files(data_folder)


In [None]:
!python D:/env_monitoring_platform/backend/ml/data_exploration.py

In [None]:
!python D:/env_monitoring_platform/backend/ml/data_exploration.py

In [None]:
!python D:/env_monitoring_platform/backend/ml/data_exploration.py

In [None]:
!python D:/env_monitoring_platform/backend/ml/data_exploration.py

In [None]:
!python D:/env_monitoring_platform/backend/ml/data_exploration.py

In [None]:
!python D:/env_monitoring_platform/backend/ml/prophet_pm25_forecast.py

In [None]:
!python D:/env_monitoring_platform/backend/ml/run_all_univariate.py

In [None]:
!python D:/env_monitoring_platform/backend/ml/xgboost_multivariate.py

In [None]:
!python D:/env_monitoring_platform/backend/utils/consolidate_metrics.py

In [None]:
!python D:/env_monitoring_platform/backend/weather_data_files/prophet_weather_univariate_all.py

In [None]:
!python D:/env_monitoring_platform/backend/weather_data_files/prophet_weather_univariate_all.py

In [None]:
!python D:/env_monitoring_platform/backend/weather_data_files/prophet_weather_univariate_all.py

In [None]:
!python D:/env_monitoring_platform/backend/weather_data_files/sarima_weather_univariate.py

In [None]:
!python backend/utils/generate_all_metrics.py

In [None]:
!python D:/env_monitoring_platform/backend/utils/generate_all_metrics.py

In [None]:
!python D:/env_monitoring_platform/backend/utils/generate_all_metrics.py

In [None]:
!python D:/env_monitoring_platform/backend/utils/generate_all_metrics.py

In [None]:
!python D:/env_monitoring_platform/backend/utils/generate_all_metrics.py

In [None]:
!python D:/env_monitoring_platform/backend/utils/generate_all_metrics.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\prophet_weather_univariate_all.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\create_weather_metrics_summary.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\sarima_weather_univariate.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\create_weather_metrics_summary.py

In [None]:
print("Prophet columns:", prophet_metrics.columns)
print("SARIMA columns:", sarima_metrics.columns)


In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\xgboost_univariate_weather.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\create_weather_metrics_summary.py

In [None]:
!python D:\env_monitoring_platform\backend\utils\generate_all_metrics.py

In [None]:
!python D:\env_monitoring_platform\backend\utils\metrics_summary_generator.py

In [None]:
!python D:\env_monitoring_platform\backend\utils\consolidate_metrics.py

In [None]:
!python D:\env_monitoring_platform\backend\utils\generate_air_quality_bar_charts.py

In [None]:
!python D:\env_monitoring_platform\backend\utils\generate_air_quality_bar_charts.py

In [None]:
!python D:\env_monitoring_platform\backend\utils\generate_air_quality_bar_charts_multivariate.py

In [None]:
!python D:\env_monitoring_platform\backend\utils\merge_air_quality_metrics.py

In [None]:
!python D:\env_monitoring_platform\backend\utils\generate_air_quality_bar_charts_combined.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\prophet_weather_univariate_all.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\sarima_weather_univariate.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\xgboost_univariate_weather.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\xgboost_multivariate_weather.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\create_weather_metrics_summary.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\plot_weather_model_metrics.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\prophet_weather_univariate_all.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\sarima_weather_univariate.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\xgboost_univariate_weather.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\xgboost_multivariate_weather.py

In [None]:
!python D:\env_monitoring_platform\backend\utils\consolidate_metrics.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\create_weather_metrics_summary.py

In [None]:
!python D:\env_monitoring_platform\backend\weather_data_files\plot_weather_model_metrics.py

In [1]:
!python D:\env_monitoring_platform\backend\ml\prophet_univariate.py

 Running Prophet forecast for: PM2.5
 Saved forecast plot: D:/env_monitoring_platform/backend/plots/air/prophet\prophet_PM2.5_forecast.png
 Running Prophet forecast for: PM10
 Saved forecast plot: D:/env_monitoring_platform/backend/plots/air/prophet\prophet_PM10_forecast.png
 Running Prophet forecast for: CO
 Saved forecast plot: D:/env_monitoring_platform/backend/plots/air/prophet\prophet_CO_forecast.png
 Running Prophet forecast for: NO2
 Saved forecast plot: D:/env_monitoring_platform/backend/plots/air/prophet\prophet_NO2_forecast.png
 Running Prophet forecast for: SO2
 Saved forecast plot: D:/env_monitoring_platform/backend/plots/air/prophet\prophet_SO2_forecast.png
 Running Prophet forecast for: O3
 Saved forecast plot: D:/env_monitoring_platform/backend/plots/air/prophet\prophet_O3_forecast.png
 Metrics saved to: D:/env_monitoring_platform/backend/ml/prophet_air_metrics.csv


Importing plotly failed. Interactive plots will not work.
04:25:19 - cmdstanpy - INFO - Chain [1] start processing
04:25:20 - cmdstanpy - INFO - Chain [1] done processing
04:25:31 - cmdstanpy - INFO - Chain [1] start processing
04:25:33 - cmdstanpy - INFO - Chain [1] done processing
04:25:37 - cmdstanpy - INFO - Chain [1] start processing
04:25:41 - cmdstanpy - INFO - Chain [1] done processing
04:25:50 - cmdstanpy - INFO - Chain [1] start processing
04:25:53 - cmdstanpy - INFO - Chain [1] done processing
04:25:57 - cmdstanpy - INFO - Chain [1] start processing
04:25:59 - cmdstanpy - INFO - Chain [1] done processing
04:26:09 - cmdstanpy - INFO - Chain [1] start processing
04:26:11 - cmdstanpy - INFO - Chain [1] done processing


In [2]:
!python D:\env_monitoring_platform\backend\ml\sarima_univariate.py

Running SARIMA for PM2.5...
Saved plot: D:/env_monitoring_platform/backend/plots/air/sarima/sarima_pm25.png
Running SARIMA for PM10...
Saved plot: D:/env_monitoring_platform/backend/plots/air/sarima/sarima_pm10.png
Running SARIMA for CO...
Saved plot: D:/env_monitoring_platform/backend/plots/air/sarima/sarima_co.png
Running SARIMA for NO2...
Saved plot: D:/env_monitoring_platform/backend/plots/air/sarima/sarima_no2.png
Running SARIMA for SO2...
Saved plot: D:/env_monitoring_platform/backend/plots/air/sarima/sarima_so2.png
Running SARIMA for O3...
Saved plot: D:/env_monitoring_platform/backend/plots/air/sarima/sarima_o3.png
Metrics saved to: D:/env_monitoring_platform/backend/ml/sarima_air_metrics.csv


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


In [9]:
!python D:\env_monitoring_platform\backend\ml\xgboost_univariate.py

Loaded 81102_all.csv: 267 monthly records

Running XGBoost for PM2.5...
Saved plot: D:/env_monitoring_platform/backend/plots/air/xgboost/xgboost_pm25.png
Loaded 88101_all.csv: 306 monthly records

Running XGBoost for PM10...
Saved plot: D:/env_monitoring_platform/backend/plots/air/xgboost/xgboost_pm10.png
Loaded 42101_all.csv: 360 monthly records

Running XGBoost for CO...
Saved plot: D:/env_monitoring_platform/backend/plots/air/xgboost/xgboost_co.png
Loaded 42401_all.csv: 360 monthly records

Running XGBoost for NO2...
Saved plot: D:/env_monitoring_platform/backend/plots/air/xgboost/xgboost_no2.png
Loaded 42602_all.csv: 360 monthly records

Running XGBoost for SO2...
Saved plot: D:/env_monitoring_platform/backend/plots/air/xgboost/xgboost_so2.png
Loaded 44201_all.csv: 360 monthly records

Running XGBoost for O3...
Saved plot: D:/env_monitoring_platform/backend/plots/air/xgboost/xgboost_o3.png

Saved metrics to: D:/env_monitoring_platform/backend/ml/xgboost_air_metrics.csv


In [10]:
!python D:\env_monitoring_platform\backend\ml\xgboost_multivariate.py

Merged data shape: (10958, 7)

--- Forecasting PM2.5 ---

--- Forecasting PM10 ---

--- Forecasting CO ---

--- Forecasting SO2 ---

--- Forecasting NO2 ---

--- Forecasting O3 ---

Saved metrics to: xgb_multivariate_metrics.csv
  Pollutant    RMSE     MAE    MAPE
0     PM2.5  4.4218  2.0655   33.77
1      PM10  3.8773  2.2565   23.96
2        CO  0.0471  0.0340   14.68
3       SO2  0.6191  0.3670  108.02
4       NO2  3.9213  3.0117   31.92
5        O3  0.0045  0.0036   13.54


In [14]:
!python D:\env_monitoring_platform\backend\utils\metrics_summary_generator.py

 Consolidated evaluation metrics saved to D:\env_monitoring_platform\backend\ml\air_forecasting_metrics.csv


In [15]:
!python D:\env_monitoring_platform\backend\utils\merge_air_quality_metrics.py

Saved combined metrics to 'air_quality_combined_metrics.csv'


In [17]:
!python D:\env_monitoring_platform\backend\utils\generate_air_quality_bar_charts_combined.py

Saved: ../plots/air/bar_charts\air_quality_comparison_rmse.png
Saved: ../plots/air/bar_charts\air_quality_comparison_mae.png
Saved: ../plots/air/bar_charts\air_quality_comparison_mape.png


In [22]:
!python D:\env_monitoring_platform\backend\ml\combine_air_pollutants.py

Combined air data saved to: D:\env_monitoring_platform\backend\ml\combined_air_data.csv


In [23]:
!python D:\env_monitoring_platform\backend\utils\air_corr_heatmap.py

Air quality heatmap saved to: D:/env_monitoring_platform/backend/plots/air/air_corr_heatmap.png
