# Baseline Models - PM10

## Import Data

In [1]:
import pandas as pd

In [2]:
import numpy as np

In [6]:
df = pd.read_csv("../data/data_final.csv")

In [7]:
df

Unnamed: 0,temp,timestamp_local,city_name,country_code,aqi,co,no2,o3,pm10,pm25,so2
0,12.6,2022-02-01T00:00:00,Gujrāt,PK,385.0,1339.8,76.0,10.7,491.7,347.67,238.0
1,11.5,2022-02-01T01:00:00,Gujrāt,PK,404.0,1437.6,76.0,9.3,508.3,359.33,268.0
2,11.9,2022-02-01T02:00:00,Gujrāt,PK,421.0,1535.5,76.0,8.0,525.0,371.00,298.0
3,12.2,2022-02-01T03:00:00,Gujrāt,PK,425.0,1659.0,68.3,5.3,529.3,374.00,275.7
4,11.9,2022-02-01T04:00:00,Gujrāt,PK,430.0,1782.5,60.7,2.7,533.7,377.00,253.3
...,...,...,...,...,...,...,...,...,...,...,...
18213,20.7,2024-02-29T19:00:00,Gujrāt,PK,122.0,246.9,23.3,64.7,67.0,43.67,32.3
18214,18.1,2024-02-29T20:00:00,Gujrāt,PK,145.0,301.1,30.0,48.0,79.0,52.00,37.0
18215,18.5,2024-02-29T21:00:00,Gujrāt,PK,152.0,304.6,26.7,46.7,86.0,56.67,36.7
18216,17.4,2024-02-29T22:00:00,Gujrāt,PK,158.0,308.1,23.3,45.3,93.0,61.33,36.3


In [8]:
data = df[["pm10"]].copy()

In [9]:
data.head()

Unnamed: 0,pm10
0,491.7
1,508.3
2,525.0
3,529.3
4,533.7


## Arithmetic Mean

In [10]:
train = data[:-168]

In [11]:
test = data[-168:]

In [12]:
historical_mean = np.mean(train)

In [13]:
historical_mean

120.95415554072096

In [14]:
test['pred_mean'] = historical_mean

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['pred_mean'] = historical_mean


In [15]:
test

Unnamed: 0,pm10,pred_mean
18050,50.7,120.954156
18051,47.3,120.954156
18052,44.0,120.954156
18053,43.7,120.954156
18054,43.3,120.954156
...,...,...
18213,67.0,120.954156
18214,79.0,120.954156
18215,86.0,120.954156
18216,93.0,120.954156


In [16]:
def mape(y_true, y_pred):
 return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [17]:
def rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

In [18]:
mape_hist_mean = mape(test['pm10'], test['pred_mean'])
print(mape_hist_mean)

116.28587122557781


In [19]:
rmse_hist_mean = rmse(test['pm10'], test['pred_mean'])
print(rmse_hist_mean)

62.1915668774277


## Last Week Mean

In [21]:
last_week_mean = np.mean(train.pm10[-168:])

In [22]:
last_week_mean

94.66071428571429

In [23]:
test['pred_last_week'] = last_week_mean

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['pred_last_week'] = last_week_mean


In [24]:
mape_last_week_mean = mape(test['pm10'], test['pred_last_week'])
print(mape_last_week_mean)

69.34905626014849


In [25]:
rmse_last_week_mean = rmse(test['pm10'], test['pred_last_week'])
print(rmse_last_week_mean)

37.37151827797957


## Last Known Value

In [26]:
last = train.pm10.iloc[-1]

In [27]:
last

54.0

In [28]:
test.loc[:, 'pred_last'] = last

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test.loc[:, 'pred_last'] = last


In [29]:
mape_last = mape(test['pm10'], test['pred_last'])
print(mape_last)

25.285032651313887


In [30]:
rmse_last = rmse(test['pm10'], test['pred_last'])
print(rmse_last)

17.25053484333029


## Naive Seasonal Forecast

In [31]:
test.loc[:, 'pred_last_season'] = train['pm10'][-168:].values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test.loc[:, 'pred_last_season'] = train['pm10'][-168:].values


In [32]:
mape_naive_seasonal = mape(test['pm10'], test['pred_last_season'])
mape_naive_seasonal

85.94455490623777

In [33]:
rmse_naive_seasonal = rmse(test['pm10'], test['pred_last_season'])
print(rmse_naive_seasonal)

55.98969930774256
