In [1]:
import datetime
import pytz
import numpy as np
import matplotlib.pyplot as plt
from functools import reduce
%matplotlib inline
import seaborn as sns
import pandas as pd
pd.set_option('display.max_columns', None)
# print(pd.__version__)

In [2]:
pol_codes = [1, 5, 7, 8, 10, 6001]
pol_dict = {7: "O3", 6001: "PM2.5", 5: "PM10", 10: "CO", 1: "SO2", 8: "NO2"}
pol_dict_rev ={'SO2': 1,'PM10': 5,'O3': 7,'NO2': 8,'CO': 10,'PM2.5': 6001}
pol_units = {1:'µg/m3',5:'µg/m3',7:'µg/m3',8:'µg/m3',10:'mg/m3',6001:'µg/m3'}

In [3]:
### Import final version of Denmark dataset

df_denmark_v5 = pd.read_csv('denmark_v5.csv', low_memory=False)
df_denmark_v5 = df_denmark_v5.drop_duplicates(['AirQualityStation','AirPollutant','DatetimeEnd'], keep='last')

In [4]:
df_denmark_v5.groupby('AirQualityStation')['Concentration'].count().reset_index().sort_values(by='Concentration', ascending=False).head(3)

Unnamed: 0,AirQualityStation,Concentration
6,STA-DK0034A,468964
1,STA-DK0012R,300024
7,STA-DK0045A,234902


In [5]:
df_v5 = df_denmark_v5[df_denmark_v5['AirQualityStation']=='STA-DK0034A'].copy()
df_v5['DatetimeEnd'] = pd.to_datetime(df_v5['DatetimeEnd'], format="%Y-%m-%d %H:%M:%S")

In [6]:
df_v5.groupby('AirPollutant')['DatetimeEnd'].count()

AirPollutant
1       79115
5       79067
7       76232
8       79330
10      78594
6001    76626
Name: DatetimeEnd, dtype: int64

In [7]:
df_v5.info(verbose=True, show_counts=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 468964 entries, 8147 to 2407479
Data columns (total 7 columns):
 #   Column                 Non-Null Count   Dtype                               
---  ------                 --------------   -----                               
 0   AirQualityStation      468964 non-null  object                              
 1   AirPollutant           468964 non-null  int64                               
 2   Concentration          468964 non-null  float64                             
 3   DatetimeEnd            468964 non-null  datetime64[ns, pytz.FixedOffset(60)]
 4   AirQualityStationType  468964 non-null  object                              
 5   AirQualityStationArea  468964 non-null  object                              
 6   StationCity            468964 non-null  object                              
dtypes: datetime64[ns, pytz.FixedOffset(60)](1), float64(1), int64(1), object(4)
memory usage: 28.6+ MB


In [8]:
### Written by Alexandra Nasonova https://github.com/AlexandraNasonova
### Modified and adapted by Adele Kim https://github.com/Adele-Kim

def mean_timeseries(df):
    df_mean_list = list()
    for i in range(len(pol_codes)):
        df_mean_list.append(df[df['AirPollutant']==pol_codes[i]].groupby('DatetimeEnd', as_index=False)['Concentration'].mean())
        df_mean_list[i].rename(columns={'Concentration': pol_dict[pol_codes[i]]}, inplace=True)
        timeseries = reduce(lambda df1, df2: df1.merge(df2, how='outer', left_on=['DatetimeEnd'], right_on=['DatetimeEnd']), df_mean_list)
    return timeseries

In [9]:
ts_dk = mean_timeseries(df_v5)
ts_dk2 = ts_dk.copy()
ts_dk2 = ts_dk2.set_index('DatetimeEnd')
ts_dk2 = ts_dk2.asfreq('H')

In [10]:
print(ts_dk2['SO2'].max())
print(ts_dk2['PM10'].max())
print(ts_dk2['O3'].max())
print(ts_dk2['NO2'].max())
print(ts_dk2['CO'].max())
print(ts_dk2['PM2.5'].max())

23.348
685.7
160.9
217.043
5.362
472.8


In [11]:
def aqi_easy_1(df, column_index): # SO2
    conc = [j for i in df.iloc[:, column_index:(column_index+1)].values.tolist() for j in i]
    c_list = []
    aqi = 0
    c = 0
    for i in range(len(conc)):
        try:
            if conc[i]<=35:
                aqi = int(round((50 * conc[i]) / 35, 0))
            elif 35<conc[i]<=75:
                aqi = int(round((100 - 51) * (conc[i] - 36) / (75 - 36) + 51, 0))
            elif 100<conc[i]<=185:
                aqi = int(round((150 - 101) * (conc[i] - 76) / (185 - 76) + 101, 0))
            elif 185<conc[i]<=304:
                aqi = int(round((200 - 151) * (conc[i] - 186) / (304 - 186) + 151, 0))
            elif 304<conc[i]<=604:
                aqi = int(round((300 - 201) * (conc[i] - 305) / (604 - 305) + 201, 0))
            elif 604<conc[i]<=804:
                aqi = int(round((400 - 301) * (conc[i] - 605) / (804 - 605) + 301, 0))
            else:
                aqi = int(round((500 - 401) * (conc[i] - 805) / (1004 - 805) + 401, 0))
        except:
            aqi = np.nan
        c_list.append(aqi)
    return c_list

In [12]:
def aqi_easy_5(df, column_index): # PM10
    conc = [j for i in df.iloc[:, column_index:(column_index+1)].values.tolist() for j in i]
    c_list = []
    aqi = 0
    c = 0
    for i in range(len(conc)):
        try:
            if conc[i]<=54:
                aqi = int(round((50 * conc[i]) / 54, 0))
            elif 54<conc[i]<=154:
                aqi = int(round((100 - 51) * (conc[i] - 55) / (154 - 55) + 51, 0))
            elif 154<conc[i]<=254:
                aqi = int(round((150 - 101) * (conc[i] - 155) / (254 - 155) + 101, 0))
            elif 254<conc[i]<=354:
                aqi = int(round((200 - 151) * (conc[i] - 255) / (354 - 255) + 151, 0))
            elif 354<conc[i]<=424:
                aqi = int(round((300 - 201) * (conc[i] - 355) / (424 - 355) + 201, 0))
            elif 424<conc[i]<=504:
                aqi = int(round((400 - 301) * (conc[i] - 425) / (504 - 425) + 301, 0))
            else:
                aqi = int(round((500 - 401) * (conc[i] - 505) / (604 - 505) + 401, 0))
        except:
            aqi = np.nan
        c_list.append(aqi)
    return c_list

In [13]:
def aqi_easy_7_8H(df, column_index): # O3 8Hour
    conc = [j for i in df.iloc[:, column_index:(column_index+1)].values.tolist() for j in i]
    c_list = []
    aqi = 0
    c = 0
    for i in range(len(conc)):
        try:
            if conc[i]<=0.054:
                aqi = int(round((50 * conc[i]) / 0.054, 0))
            elif 0.054<conc[i]<=0.070:
                aqi = int(round((100 - 51) * (conc[i] - 0.055) / (0.070 - 0.055) + 51, 0))
            elif 0.070<conc[i]<=0.085:
                aqi = int(round((150 - 101) * (conc[i] - 0.071) / (0.085 - 0.071) + 101, 0))
            elif 0.085<conc[i]<=0.105:
                aqi = int(round((200 - 151) * (conc[i] - 0.086) / (0.105 - 0.086) + 151, 0))
            elif 0.105<conc[i]<=0.200:
                aqi = int(round((300 - 201) * (conc[i] - 0.106) / (0.200 - 0.106) + 201, 0))
            else:
                aqi = np.nan
        except:
            aqi = np.nan
        c_list.append(aqi)
    return c_list


def aqi_easy_7_1H(df, column_index): # O3 1Hour
    conc = [j for i in df.iloc[:, column_index:(column_index+1)].values.tolist() for j in i]
    c_list = []
    aqi = 0
    c = 0
    for i in range(len(conc)):
        try:
            if 0.124<conc[i]<=0.164:
                aqi = int(round((150 - 101) * (conc[i] - 0.125) / (0.164 - 0.125) + 101, 0))
            elif 0.164<conc[i]<=0.204:
                aqi = int(round((200 - 151) * (conc[i] - 0.165) / (0.204 - 0.165) + 151, 0))
            elif 0.204<conc[i]<=0.404:
                aqi = int(round((300 - 201) * (conc[i] - 0.205) / (0.404 - 0.205) + 201, 0))
            elif 0.404<conc[i]<=0.504:
                aqi = int(round((400 - 301) * (conc[i] - 0.405) / (0.504 - 0.405) + 301, 0))
            elif 0.504<conc[i]<=0.604:
                aqi = int(round((500 - 401) * (conc[i] - 0.505) / (0.604 - 0.505) + 401, 0))
            else:
                aqi = np.nan
        except:
            aqi = np.nan
        c_list.append(aqi)
    return c_list

In [14]:
def aqi_easy_8(df, column_index): # NO2
    conc = [j for i in df.iloc[:, column_index:(column_index+1)].values.tolist() for j in i]
    c_list = []
    aqi = 0
    c = 0
    for i in range(len(conc)):
        try:
            if conc[i]<=53.0:
                aqi = int(round((50 * conc[i]) / 53, 0))
            elif 53<conc[i]<=100:
                aqi = int(round((100 - 51) * (conc[i] - 54) / (100 - 54) + 51, 0))
            elif 100<conc[i]<=360:
                aqi = int(round((150 - 101) * (conc[i] - 101) / (360 - 101) + 101, 0))
            elif 360<conc[i]<=649:
                aqi = int(round((200 - 151) * (conc[i] - 361) / (649 - 361) + 151, 0))
            elif 649<conc[i]<=1249:
                aqi = int(round((300 - 201) * (conc[i] - 650) / (1249 - 650) + 201, 0))
            elif 1249<conc[i]<=1649:
                aqi = int(round((400 - 301) * (conc[i] - 1250) / (1649 - 1250) + 301, 0))
            else:
                aqi = int(round((500 - 401) * (conc[i] - 1650) / (2049 - 1650) + 401, 0))
        except:
            aqi = np.nan
        c_list.append(aqi)
    return c_list

In [15]:
def aqi_easy_10(df, column_index): # CO
    conc = [j for i in df.iloc[:, column_index:(column_index+1)].values.tolist() for j in i]
    c_list = []
    aqi = 0
    c = 0
    for i in range(len(conc)):
        try:
            if conc[i]<=4.4:
                aqi = int(round((50 * conc[i]) / 4.4, 0))
            elif 4.4<conc[i]<=9.4:
                aqi = int(round((100 - 51) * (conc[i] - 4.5) / (9.4 - 4.5) + 51, 0))
            elif 9.4<conc[i]<=12.4:
                aqi = int(round((150 - 101) * (conc[i] - 9.5) / (12.4 - 9.5) + 101, 0))
            elif 12.4<conc[i]<=15.4:
                aqi = int(round((200 - 151) * (conc[i] - 12.5) / (15.4 - 12.5) + 151, 0))
            elif 15.4<conc[i]<=30.4:
                aqi = int(round((300 - 201) * (conc[i] - 15.5) / (30.4 - 15.5) + 201, 0))
            elif 30.4<conc[i]<=40.4:
                aqi = int(round((400 - 301) * (conc[i] - 30.5) / (40.4 - 30.5) + 301, 0))
            else:
                aqi = int(round((500 - 401) * (conc[i] - 40.5) / (50.4 - 40.5) + 401, 0))
        except:
            aqi = np.nan
        c_list.append(aqi)
    return c_list

In [16]:
def aqi_easy_6001(df, column_index): # PM2.5
    conc = [j for i in df.iloc[:, column_index:(column_index+1)].values.tolist() for j in i]
    c_list = []
    aqi = 0
    c = 0
    for i in range(len(conc)):
        try:
            if conc[i]<=12.0:
                aqi = int(round((50 * conc[i]) / 12.0, 0))
            elif 12.0<conc[i]<=35.4:
                aqi = int(round((100 - 51) * (conc[i] - 12.1) / (35.4 - 12.1) + 51, 0))
            elif 35.4<conc[i]<=55.4:
                aqi = int(round((150 - 101) * (conc[i] - 35.5) / (55.4 - 35.5) + 101, 0))
            elif 55.4<conc[i]<=150.4:
                aqi = int(round((200 - 151) * (conc[i] - 55.5) / (150.4 - 55.5) + 151, 0))
            elif 150.4<conc[i]<=250.4:
                aqi = int(round((300 - 201) * (conc[i] - 150.5) / (250.4 - 150.5) + 201, 0))
            elif 250.4<conc[i]<=350.4:
                aqi = int(round((400 - 301) * (conc[i] - 250.5) / (350.4 - 250.5) + 301, 0))
            else:
                aqi = int(round((500 - 401) * (conc[i] - 350.5) / (500.4 - 350.5) + 401, 0))
        except:
            aqi = np.nan
        c_list.append(aqi)
    return c_list

In [17]:
# SO2 = 1000*0.001*(ug/m3)/2.664 = (ug/m3)/2.664
# NO2 = 1000*0.001*(ug/m3)/1.913 = (ug/m3)/1.913
# O3 = (ug/m3)/(1.996*1000) Ozon
# CO = (mg/m3)/1.165 CO

def converter(df_temp):
    df = df_temp
    df.iloc[:, 0:1] = df.iloc[:, 0:1]/2.664      # SO2
    df.iloc[:, 2:3] = df.iloc[:, 2:3]/1996       # O3
    df.iloc[:, 3:4] = df.iloc[:, 3:4]/1.913      # NO2
    df.iloc[:, 4:5] = df.iloc[:, 4:5]/1.165      # CO
    return df

In [18]:
def roller(df_temp):
    df = df_temp
    df['1_1H'] = df.iloc[:, 0:1]                                              # SO2    1H 
    df['5_24H'] = df.iloc[:, 1:2].rolling(window=24, min_periods=1).mean()    # PM10  24H
    df['7_8H'] = df.iloc[:, 2:3].rolling(window=8, min_periods=1).mean()      # O3     8H
    df['7_1H'] = df.iloc[:, 2:3]                                              # O3     1H
    df['8_1H'] = df.iloc[:, 3:4]                                              # NO2    1H
    df['10_8H'] = df.iloc[:, 4:5].rolling(window=24, min_periods=1).mean()    # CO     8H
    df['6001_24H'] = df.iloc[:, 5:6].rolling(window=24, min_periods=1).mean() # PM2.5 24H
    return df

In [19]:
def calculator(df_temp):
    df = df_temp
    df['aqi_1'] = aqi_easy_1(df, 0)
    df['aqi_5'] = aqi_easy_5(df, 1)
    df['aqi_7_8H'] = aqi_easy_7_8H(df, 2)
    df['aqi_7_1H'] = aqi_easy_7_1H(df, 2)
    df['aqi_8'] = aqi_easy_8(df, 3)
    df['aqi_10'] = aqi_easy_10(df, 4)
    df['aqi_6001'] = aqi_easy_6001(df, 5)
    return df

In [20]:
def combiner(df_temp):
    df = df_temp
    df = df.iloc[:,13:20]
    df['aqi'] = df.iloc[:,:7].max(axis=1)
    df = df.iloc[:,7:8]
    return df

In [21]:
def grouper(df_temp):
    df = df_temp
    df_index = pd.DataFrame(index=df.groupby(pd.Grouper(freq="24H")).count().index)
    df = df_index.merge(df.groupby(pd.Grouper(freq="24H")).mean(), left_index=True, right_index=True)
    return df

In [22]:
def filler(df_temp):
    df = df_temp
    df = df.fillna(df.median())
    return df

In [23]:
def rounder(df_temp):
    df = df_temp
    df['aqi'] = round(df['aqi'], 0)
    return df

In [24]:
def aqi(df):
    df = rounder(filler(grouper(combiner(calculator(roller(converter(df)))))))
    return df

In [25]:
ts_dk8 = aqi(ts_dk2)
ts_dk8.head()

Unnamed: 0_level_0,aqi
DatetimeEnd,Unnamed: 1_level_1
2013-01-01 00:00:00+01:00,21.0
2013-01-02 00:00:00+01:00,41.0
2013-01-03 00:00:00+01:00,39.0
2013-01-04 00:00:00+01:00,48.0
2013-01-05 00:00:00+01:00,33.0


In [26]:
### Check how many NaNs were there before we applied filler function

display(len((grouper(combiner(calculator(roller(converter(ts_dk2)))))).query('aqi!=aqi')))

43

In [27]:
ts_final = ts_dk8.copy()

In [28]:
def feature_creator_v2(df_temp):
    df = df_temp
    df.columns = ['aqi_d5']
    df['aqi_d4'] = df['aqi_d5'].shift(1)
    df['aqi_d3'] = df['aqi_d5'].shift(2)
    df['aqi_d2'] = df['aqi_d5'].shift(3)
    df['aqi_d1'] = df['aqi_d5'].shift(4)
    df['aqi'] = df['aqi_d5'].shift(5)
    df['aqi_dd1'] = df['aqi_d1'].shift(1)
    df['aqi_dd2'] = df['aqi_d1'].shift(2)
    df['aqi_dd1'] = df['aqi_d1'] - df['aqi_dd1']
    df['aqi_dd2'] = df['aqi_d2'] - df['aqi_dd2']
#     df['month'] = df.index.month
#     df['monthday'] = df.index.strftime("%d")
# #     df['week'] = df.index.isocalendar().week
#     df['weekend'] = df.index.weekday
#     weekdays_list = list(df["weekend"])
#     weekend_list = []
#     for i in range(len(weekdays_list)):
#         weekend_flag = False
#         if (weekdays_list[i] == 6) | (weekdays_list[i] == 0):
#             weekend_flag = True
#         weekend_list.append(weekend_flag)
#     df['weekend'] = weekend_list
    df = df.iloc[7:,:]
    return df

In [29]:
tsf2 = feature_creator_v2(ts_final)

In [30]:
def data_splitter(df_temp):
    df = df_temp
    df_train = df[df.index < "2022-01-01 00:00:00+01:00"].copy()
    df_test = df[df.index >= "2022-01-01 00:00:00+01:00"].copy()
    return df_train, df_test

In [31]:
df_train, df_test = data_splitter(tsf2)

In [32]:
display(len(df_train))
display(len(df_test))

3280

303

In [33]:
# columns_to_drop_old = ['aqi_d5','aqi_d4','aqi_d3','aqi','month','monthday','weekday','day']
columns_to_drop = ['aqi']

y_train = df_train['aqi']
X_train = df_train.drop(columns_to_drop, axis=1)

y_test = df_test['aqi']
X_test = df_test.drop(columns_to_drop, axis=1)

In [34]:
display(X_train.head(5))
display(X_test.head(5))

Unnamed: 0_level_0,aqi_d5,aqi_d4,aqi_d3,aqi_d2,aqi_d1,aqi_dd1,aqi_dd2
DatetimeEnd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-01-08 00:00:00+01:00,32.0,29.0,24.0,33.0,48.0,-2.0,18.0
2013-01-09 00:00:00+01:00,33.0,32.0,29.0,24.0,33.0,9.0,7.0
2013-01-10 00:00:00+01:00,24.0,33.0,32.0,29.0,24.0,-15.0,-6.0
2013-01-11 00:00:00+01:00,30.0,24.0,33.0,32.0,29.0,-9.0,-24.0
2013-01-12 00:00:00+01:00,30.0,30.0,24.0,33.0,32.0,5.0,-4.0


Unnamed: 0_level_0,aqi_d5,aqi_d4,aqi_d3,aqi_d2,aqi_d1,aqi_dd1,aqi_dd2
DatetimeEnd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-01-01 00:00:00+01:00,60.0,18.0,32.0,89.0,79.0,10.0,-1.0
2022-01-02 00:00:00+01:00,33.0,60.0,18.0,32.0,89.0,46.0,56.0
2022-01-03 00:00:00+01:00,31.0,33.0,60.0,18.0,32.0,10.0,56.0
2022-01-04 00:00:00+01:00,28.0,31.0,33.0,60.0,18.0,-57.0,-47.0
2022-01-05 00:00:00+01:00,28.0,28.0,31.0,33.0,60.0,-14.0,-71.0


In [35]:
# X_train_cat_numeric = df_train[['aqi_d5', 'aqi_d4', 'aqi_d3', 'aqi_d2', 'aqi_d1', 'aqi_dd1', 'aqi_dd2']] 
# X_train_cat_categorical =  df_train[['month','monthday','weekend']]
# X_test_cat_numeric = df_test[['aqi_d5', 'aqi_d4', 'aqi_d3', 'aqi_d2', 'aqi_d1', 'aqi_dd1', 'aqi_dd2']] 
# X_test_cat_categorical =  df_test[['month','monthday','weekend']]
# X_train_cat = pd.DataFrame(np.hstack([X_train_cat_numeric,X_train_cat_categorical]))
# X_test_cat = pd.DataFrame(np.hstack([X_test_cat_numeric,X_test_cat_categorical]))

In [36]:
# from sklearn.preprocessing import OneHotEncoder
# from sklearn.model_selection import GridSearchCV
# from sklearn.linear_model import Ridge

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error as MSE

In [37]:
# scaler = StandardScaler()
# X_tr = scaler.fit_transform(X_train)
# X_te = scaler.transform(X_test)

# one = OneHotEncoder(sparse=False, drop='first')

# X_train_cat_transformed = pd.DataFrame(one.fit_transform(X_train_cat_categorical))
# X_train_cat_transformed.reindex 
# X_train_cat_numeric = scaler.fit_transform(X_train_cat_numeric)
# X_train_cat = pd.DataFrame(np.hstack([X_train_cat_numeric,X_train_cat_transformed]))
# X_train_cat.reindex

# X_test_cat_transformed = pd.DataFrame(one.fit_transform(X_test_cat_categorical))
# X_test_cat_transformed.reindex 
# X_test_cat_numeric = scaler.fit_transform(X_test_cat_numeric)
# X_test_cat = pd.DataFrame(np.hstack([X_test_cat_numeric,X_test_cat_transformed]))
# X_test_cat.reindex

# print("X_train_cat ", X_train_cat.shape)
# X_train_cat.head()

In [38]:
# print("X_test_cat ", X_test_cat.shape)
# X_test_cat.head()

In [39]:
# assert X_train_cat.shape == (X_train_cat.shape[0], X_train_cat.shape[1])
# assert X_test_cat.shape == (X_test_cat.shape[0], X_test_cat.shape[1])

In [40]:
# ### Ridge regression

# parameters = {'alpha': range(1, 200, 1)} 
# model = Ridge()
# ridge_grid = GridSearchCV(model, parameters, cv = 10, scoring='r2')
# ridge_grid.fit(X_train_cat, y_train)

# pred_train = ridge_grid.predict(X_train_cat)
# pred_test = ridge_grid.predict(X_test_cat)

# # X_test_cat_transformed= one.transform(X_test_cat_categorical)
# X_test_cat_transformed = pd.DataFrame(one.transform(X_test_cat_categorical))
# X_test_cat_numeric = scaler.transform(X_test_cat_numeric)
# X_test_cat = pd.DataFrame(np.hstack([X_test_cat_numeric,X_test_cat_transformed]))
# X_test_cat.reindex

# print(ridge_grid.best_estimator_)
# print('for TRAIN:')
# print('MSE:', MSE(y_train, pred_train))
# print('R2:', r2_score(y_train, pred_train))
# print('for TEST:')
# print('MSE:', MSE(y_test, pred_test))
# print('R2:', r2_score(y_test, pred_test))

In [41]:
### Standard linear regression

scaler = StandardScaler()
scaler.fit(X_train)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

lr_scaled = LinearRegression()
lr_scaled.fit(X_train_scaled, y_train)
pred_train = lr_scaled.predict(X_train_scaled)
pred_test = lr_scaled.predict(X_test_scaled)

print('for TRAIN:')
print('MSE:', MSE(y_train, pred_train))
print('R2:', r2_score(y_train, pred_train))
print('for TEST:')
print('MSE:', MSE(y_test, pred_test))
print('R2:', r2_score(y_test, pred_test))

for TRAIN:
MSE: 71.90179589194068
R2: 0.5461169695460464
for TEST:
MSE: 48.111707198603234
R2: 0.5108993287377129
