Dataset was taken from https://archive.ics.uci.edu/dataset/360/air+quality


Visualization


In [None]:
import pandas as pd
data = pd.read_csv("AirQualityUCI.csv", sep=";")
print(data.head())
print(data.info())



In [None]:
data['DateTime'] = pd.to_datetime(data['Date'] + " " + data['Time'])
data.set_index('DateTime', inplace=True)
data = data[['CO(GT)']].dropna()
print(data.head())

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(data, label='CO(GT)')
plt.title("Air quality CO Levels/Time")
plt.xlabel("Time")
plt.ylabel("CO Levels")
plt.legend()
plt.show()
plt.hist(data['CO(GT)'], bins=30)
plt.title("CO Levels")
plt.show()


Splitting

In [None]:
train = data[:'2004-03']
test = data['2004-04':]
print(f"Train: {len(train)}, Test: {len(test)}")


Decomposing

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(train, model='additive', period=30)
result.plot()
plt.show()

Forecasts

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

train['lag1'] = train['CO(GT)'].shift(1)
train = train.dropna()

X_train = train[['lag1']]
y_train = train['CO(GT)']

test['lag1'] = test['CO(GT)'].shift(1)
test = test.dropna()

X_test = test[['lag1']]
y_test = test['CO(GT)']

rf = RandomForestRegressor()
rf.fit(X_train, y_train)
predictions = rf.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(f"RF MSE: {mse}")
plt.plot(y_test.index, y_test, label='True')
plt.plot(y_test.index, predictions, label='Predict')
plt.legend()
plt.show()


In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

ets_model = ExponentialSmoothing(train, seasonal='add', trend='add', seasonal_periods=30)
ets_fit = ets_model.fit()
ets_predictions = ets_fit.forecast(len(test))
mse_ets = mean_squared_error(test, ets_predictions)
print(f"ETS MSE: {mse_ets}")

plt.plot(test.index, test, label='True')
plt.plot(test.index, ets_predictions, label='ETS Forecast')
plt.legend()
plt.show()


Comparing

In [None]:
print(f"Random  MSE: {mse}")
print(f"ETS MSE: {mse_ets}")