In [54]:
from statsmodels.tsa.stattools import adfuller, grangercausalitytests, add_constant, coint
from statsmodels.tsa.vector_ar.vecm import coint_johansen, select_coint_rank
from statsmodels.tsa.api import AutoReg, VAR
from geopy.distance import great_circle
import matplotlib.pyplot as plt
import sklearn.metrics as skm
from tqdm import tqdm
import pandas as pd
import numpy as np
import math

In [9]:
data = pd.read_csv(r"C:\Users\VY72PC\OneDrive - ING\Documents\STARIMA Project\Data\pm25_weer.csv")
data.drop(data.iloc[:, 0:7], axis=1, inplace=True)
data.drop(["jaar", "maand", "weeknummer", "#STN", "timestamp", "components", "dag", "tijd", "uur", "datum", "sensortype", "weekdag", "U", "H", "T", "FH"], axis=1, inplace=True)

In [None]:
grouped_df = data.groupby(["YYYYMMDD", "tag"])["pm25", "longitude", "latitude", "DD"].mean().copy().reset_index()
grouped_df.rename(columns={"U":"Wind", "DD":"Angle"}, inplace=True)
# grouped_df.head(5)

In [None]:
Locations = grouped_df["tag"].unique()
LocDict = dict()

for i in range(len(Locations)):
    LocDict[Locations[i]] = (grouped_df[grouped_df.tag == Locations[i]]["latitude"].mean(), grouped_df[grouped_df.tag == Locations[i]]["longitude"].mean())

# LocDict

In [12]:
def get_bearing(coor1, coor2):
    dLon = (coor2[1] - coor1[1])
    y = math.sin(dLon) * math.cos(coor2[0])
    x = math.cos(coor1[0]) * math.sin(coor2[0]) - math.sin(coor1[0]) * math.cos(coor2[0]) * math.cos(dLon)
    brng = math.atan2(y, x)
    brng = np.rad2deg(brng)
    return brng

W = np.zeros((11, 11))
AngleMatrix = np.zeros((11, 11))

for i in range(len(LocDict)):
    for j in range(len(LocDict)):
        if i != j:
            theta = get_bearing(LocDict[Locations[i]], LocDict[Locations[j]])
            W[i, j] = 1 / great_circle(LocDict[Locations[i]], LocDict[Locations[j]]).km
            AngleMatrix[i, j] = theta

In [None]:
grouped_df["Date"] = grouped_df["YYYYMMDD"].astype(str)
grouped_df.set_index("Date", inplace=True)
grouped_df.drop(columns=["YYYYMMDD", "latitude", "longitude"], inplace=True)
# grouped_df.head(5)

In [None]:
UniqueNames = grouped_df.tag.unique()

PolDict = {elem : pd.DataFrame() for elem in UniqueNames}
# WindDict = {elem : pd.DataFrame() for elem in UniqueNames}
AngleDict = {elem : pd.DataFrame() for elem in UniqueNames}

for key in PolDict.keys():
    PolDict[key] = grouped_df[:][grouped_df.tag == key]
    PolDict[key].rename(columns={"pm25":key}, inplace=True)
    PolDict[key].drop(["Angle"], axis=1, inplace=True)
    del PolDict[key]["tag"]

    # WindDict[key] = grouped_df[:][grouped_df.tag == key]
    # WindDict[key].rename(columns={"Wind":key}, inplace=True)
    # WindDict[key].drop(["pm25", "Angle"], axis=1, inplace=True)
    # del WindDict[key]["tag"]

    AngleDict[key] = grouped_df[:][grouped_df.tag == key]
    AngleDict[key].rename(columns={"Angle":key}, inplace=True)
    AngleDict[key].drop(["pm25"], axis=1 , inplace=True)
    del AngleDict[key]["tag"]


# AngleDict["Amsterdam"]

In [None]:
df_pol = pd.DataFrame(PolDict["Amsterdam"].copy())
# df_wind = pd.DataFrame(WindDict["Amsterdam"].copy())
df_angle = pd.DataFrame(AngleDict["Amsterdam"].copy())

for key in PolDict:
    df_pol = df_pol.combine_first(PolDict[key])
    # df_wind = df_wind.combine_first(WindDict[key])
    df_angle = df_angle.combine_first(AngleDict[key])

# df_angle

In [None]:
for column in df_pol:
    median_value = (df_pol[column].median(), df_angle[column].median())  #, df_wind[column].median())
    df_pol[column].fillna(value=median_value[0], inplace = True)
    df_angle[column].fillna(value=median_value[1], inplace = True)
    # df_wind[column].fillna(value=median_value[2], inplace = True)
    
# df_angle

In [110]:
cont = select_coint_rank(endog=df_pol, det_order=1, k_ar_diff=5, method='maxeig', signif=0.05)
cont.summary()

r_0,r_1,test statistic,critical value
0,1,173.1,73.94
1,2,153.7,67.9
2,3,139.0,61.81
3,4,85.48,55.73
4,5,66.72,49.59
5,6,43.87,43.42
6,7,38.15,37.16
7,8,28.02,30.82


In [None]:
VARModel = VAR(df_pol, ).fit(trend="ctt")
VARModel.summary()

In [None]:
WY = pd.DataFrame(np.matmul(df_pol.to_numpy(), W))

for i in range(len(df_pol.columns)):
    WY.rename(columns={i:f'{df_pol.columns[i]}'}, inplace=True)

SVAR = VAR(WY).fit(trend="ctt")
SVAR.summary()

In [None]:
WW = list()


for i in range(len(df_angle)):
    wind_direction = np.zeros((len(df_angle.columns), len(df_angle.columns)))
    
    for j in range(0, len(df_angle.columns)):
        wind_direction[j, :] = np.cos(AngleMatrix[j, :] - df_angle.iloc[i, j])

    wind_direction = wind_direction * W
    wind_direction = np.nan_to_num(wind_direction, nan=0, posinf=0, neginf=0)
    WW.append(wind_direction)



WWY = np.zeros((len(df_pol), len(df_pol.columns)))
for i in range(len(df_pol)):
    WWY[i, :] = np.matmul(df_pol.iloc[i, :].to_numpy(), WW[i])


WWY = pd.DataFrame(WWY)
for i in range(len(df_pol.columns)):
    WWY.rename(columns={i:f'{df_pol.columns[i]}'}, inplace=True)



# # EXOG = pd.DataFrame(np.concatenate((df_pol, WWY), axis=1))
# # for i in range(len(df_pol.columns)):
# #     EXOG.rename(columns={i:f'{df_pol.columns[i]}'}, inplace=True)
# #     EXOG.rename(columns={i + 11:f'Spatial - {df_pol.columns[i]}'}, inplace=True)
# #     i += 1



SWVAR = VAR(WWY).fit(trend="ctt")
SWVAR.summary()

In [None]:
for key in PolDict:
    R2 = skm.r2_score(SWVAR.fittedvalues[key] + SWVAR.resid[key], SWVAR.fittedvalues[key])
    print(F'The R-Squared of {key} is: {R2*100:.2f}%')