In [21]:
import pandas as pd
from prophet import Prophet
import matplotlib.pyplot as plt
import numpy as np
import warnings
from sklearn.metrics import mean_squared_error

In [7]:
df=pd.read_excel("AirQualityUCI.xlsx")

In [8]:
df['ds'] = pd.to_datetime(df['Date'].astype(str) + ' ' + df['Time'].astype(str), format='%Y-%m-%d %H:%M:%S')
df.set_index('ds', inplace=True)
df.drop(columns=['Date', 'Time'], inplace=True)

df.replace(-200, np.nan, inplace=True)
col_to_decompose=['CO(GT)','PT08.S1(CO)','NMHC(GT)','C6H6(GT)','PT08.S2(NMHC)','NOx(GT)','PT08.S3(NOx)','NO2(GT)','PT08.S4(NO2)','PT08.S5(O3)','T','RH','AH']

In [14]:
warnings.filterwarnings("ignore", category=FutureWarning)

In [19]:
all_preds = pd.DataFrame()

for col in col_to_decompose:
  print(f'\n Forecasting: {col}')

  temp_df=df[[col]].dropna().reset_index().rename(columns={col:'y'})
  model=Prophet()
  model.fit(temp_df)

  future=model.make_future_dataframe(periods=48,freq='H')
  forecast=model.predict(future)

  model.plot(forecast)
  plt.title(f"Forecast of {col}")
  plt.xlabel("Datetime")
  plt.ylabel(col)
  plt.tight_layout()
  plt.show()

    # Store predictions
  pred_df = forecast.tail(48)[['ds', 'yhat']].rename(columns={'yhat': col})

    # Merge predictions
  if all_preds.empty:
    all_preds = pred_df
  else:
    all_preds = pd.merge(all_preds, pred_df, on='ds', how='outer')


Output hidden; open in https://colab.research.google.com to view.

In [20]:
all_preds.to_excel('submission_prophet.xlsx', index=False)

In [22]:
rmse_results = {}

for col in col_to_decompose:
    print(f"\nCalculating RMSE for: {col}")

    # Prepare data
    temp_df = df[[col]].dropna().reset_index().rename(columns={col: 'y'})
    total_len = len(temp_df)
    if total_len < 20:  # Skip if not enough data
        print(f"Skipping {col} due to insufficient data.")
        continue

    split_idx = int(total_len * 0.9)
    train = temp_df.iloc[:split_idx]
    test = temp_df.iloc[split_idx:]

    # Fit model
    model = Prophet()
    model.fit(train)

    # Predict next len(test) steps
    future = model.make_future_dataframe(periods=len(test), freq='H')
    forecast = model.predict(future)

    # Get only the predicted values corresponding to the test period
    forecast_test = forecast.tail(len(test)).reset_index(drop=True)
    test = test.reset_index(drop=True)

    # Compute RMSE
    rmse = np.sqrt(mean_squared_error(test['y'], forecast_test['yhat']))
    rmse_results[col] = rmse
    print(f"RMSE for last 10% ({col}): {rmse:.4f}")

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/pd_7stxg.json



Calculating RMSE for: CO(GT)


DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/ongcqf7b.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=1371', 'data', 'file=/tmp/tmpffz6n__y/pd_7stxg.json', 'init=/tmp/tmpffz6n__y/ongcqf7b.json', 'output', 'file=/tmp/tmpffz6n__y/prophet_modelk2gk1dvt/prophet_model-20250503150025.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
15:00:25 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
15:00:25 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.


RMSE for last 10% (CO(GT)): 1.6821

Calculating RMSE for: PT08.S1(CO)


DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/r72ss70c.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/uoy_b5uu.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=2071', 'data', 'file=/tmp/tmpffz6n__y/r72ss70c.json', 'init=/tmp/tmpffz6n__y/uoy_b5uu.json', 'output', 'file=/tmp/tmpffz6n__y/prophet_modelexihbtpc/prophet_model-20250503150028.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
15:00:28 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
15:00:30 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/xhxhy6s_.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/c2ezr3vh.json
DEBUG:cmdstanpy

RMSE for last 10% (PT08.S1(CO)): 172.1949

Calculating RMSE for: NMHC(GT)


INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/bx4j_k9_.json


RMSE for last 10% (NMHC(GT)): 175.8636

Calculating RMSE for: C6H6(GT)


DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/afv_a6ef.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=68503', 'data', 'file=/tmp/tmpffz6n__y/bx4j_k9_.json', 'init=/tmp/tmpffz6n__y/afv_a6ef.json', 'output', 'file=/tmp/tmpffz6n__y/prophet_model10o_d6bx/prophet_model-20250503150032.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
15:00:32 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
15:00:32 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/p7k49mqy.json


RMSE for last 10% (C6H6(GT)): 5.3936

Calculating RMSE for: PT08.S2(NMHC)


DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/i_b2g6ae.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=80189', 'data', 'file=/tmp/tmpffz6n__y/p7k49mqy.json', 'init=/tmp/tmpffz6n__y/i_b2g6ae.json', 'output', 'file=/tmp/tmpffz6n__y/prophet_modelqlmvwepm/prophet_model-20250503150034.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
15:00:34 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
15:00:35 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.


RMSE for last 10% (PT08.S2(NMHC)): 200.3686

Calculating RMSE for: NOx(GT)


DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/cyzq5vtc.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/e5sxcd7k.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=24312', 'data', 'file=/tmp/tmpffz6n__y/cyzq5vtc.json', 'init=/tmp/tmpffz6n__y/e5sxcd7k.json', 'output', 'file=/tmp/tmpffz6n__y/prophet_modell8anx1kj/prophet_model-20250503150037.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
15:00:37 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
15:00:38 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/g04v2zk5.json


RMSE for last 10% (NOx(GT)): 218.7955

Calculating RMSE for: PT08.S3(NOx)


DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/zz3b18rv.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=91346', 'data', 'file=/tmp/tmpffz6n__y/g04v2zk5.json', 'init=/tmp/tmpffz6n__y/zz3b18rv.json', 'output', 'file=/tmp/tmpffz6n__y/prophet_modelq_y_624s/prophet_model-20250503150040.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
15:00:40 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
15:00:42 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/az8lrxlb.json


RMSE for last 10% (PT08.S3(NOx)): 185.9698

Calculating RMSE for: NO2(GT)


DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/842dibtx.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=59346', 'data', 'file=/tmp/tmpffz6n__y/az8lrxlb.json', 'init=/tmp/tmpffz6n__y/842dibtx.json', 'output', 'file=/tmp/tmpffz6n__y/prophet_modeliti3h04o/prophet_model-20250503150044.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
15:00:44 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
15:00:45 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/e95uertw.json


RMSE for last 10% (NO2(GT)): 61.4364

Calculating RMSE for: PT08.S4(NO2)


DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/3w_dgxge.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=7469', 'data', 'file=/tmp/tmpffz6n__y/e95uertw.json', 'init=/tmp/tmpffz6n__y/3w_dgxge.json', 'output', 'file=/tmp/tmpffz6n__y/prophet_modelqiypv5vn/prophet_model-20250503150047.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
15:00:47 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
15:00:49 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/0147u7f0.json


RMSE for last 10% (PT08.S4(NO2)): 337.8313

Calculating RMSE for: PT08.S5(O3)


DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/en24uleg.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=30407', 'data', 'file=/tmp/tmpffz6n__y/0147u7f0.json', 'init=/tmp/tmpffz6n__y/en24uleg.json', 'output', 'file=/tmp/tmpffz6n__y/prophet_modelnq9yhz6t/prophet_model-20250503150051.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
15:00:51 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
15:00:52 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/gteb708a.json


RMSE for last 10% (PT08.S5(O3)): 351.0649

Calculating RMSE for: T


DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/tdj5nm7e.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=26825', 'data', 'file=/tmp/tmpffz6n__y/gteb708a.json', 'init=/tmp/tmpffz6n__y/tdj5nm7e.json', 'output', 'file=/tmp/tmpffz6n__y/prophet_modelk3w60o55/prophet_model-20250503150054.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
15:00:54 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
15:00:55 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/42fskxxg.json


RMSE for last 10% (T): 10.1778

Calculating RMSE for: RH


DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/mga29sn8.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=16122', 'data', 'file=/tmp/tmpffz6n__y/42fskxxg.json', 'init=/tmp/tmpffz6n__y/mga29sn8.json', 'output', 'file=/tmp/tmpffz6n__y/prophet_modeltevi6o7b/prophet_model-20250503150057.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
15:00:57 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
15:00:58 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/vliglmda.json


RMSE for last 10% (RH): 12.9323

Calculating RMSE for: AH


DEBUG:cmdstanpy:input tempfile: /tmp/tmpffz6n__y/47zj45di.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=36755', 'data', 'file=/tmp/tmpffz6n__y/vliglmda.json', 'init=/tmp/tmpffz6n__y/47zj45di.json', 'output', 'file=/tmp/tmpffz6n__y/prophet_modeljvyw7cp2/prophet_model-20250503150100.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
15:01:00 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
15:01:02 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


RMSE for last 10% (AH): 0.4753


In [23]:
print(rmse_results)

{'CO(GT)': np.float64(1.6820754650446197), 'PT08.S1(CO)': np.float64(172.19493462871077), 'NMHC(GT)': np.float64(175.86356513022574), 'C6H6(GT)': np.float64(5.39359266952336), 'PT08.S2(NMHC)': np.float64(200.3686436833178), 'NOx(GT)': np.float64(218.79551093936524), 'PT08.S3(NOx)': np.float64(185.96976841221144), 'NO2(GT)': np.float64(61.4364071145695), 'PT08.S4(NO2)': np.float64(337.83127192359444), 'PT08.S5(O3)': np.float64(351.0649145441186), 'T': np.float64(10.177837261161487), 'RH': np.float64(12.932284822862012), 'AH': np.float64(0.47529840349167124)}
