# Model Results

## Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

## Import Files

In [25]:
rfg = pd.read_csv("../../../Data_thesis/Full_Datasets/rfg_eval.csv")
rfc = pd.read_csv("../../../Data_thesis/Full_Datasets/rfc_eval.csv")
xgbr = pd.read_csv("../../../Data_thesis/Full_Datasets/xgbr_eval.csv")
xgbc = pd.read_csv("../../../Data_thesis/Full_Datasets/xgbc_eval.csv")

In [5]:
full_df = pd.read_csv("../../../Data_thesis/Full_Datasets/Full.csv")
full_df["Date"] = pd.to_datetime(full_df["Date"], format="%Y-%m-%d")
full_df = full_df[full_df["Date"] >= pd.Timestamp(2019,1,1)].reset_index().drop(columns=["index"])
full_df["Date"] = full_df["Date"].astype(str)
full_df.rename(index=str, columns={"CrowdednessCount": "TrueCounts"}, inplace=True)

In [17]:
full_df.columns

Index(['Date', 'Hour', 'Nieuwmarkt Lat', 'Nieuwmarkt Lon',
       'Nieuwezijds Kolk Lat', 'Nieuwezijds Kolk Lon', 'Dam Lat', 'Dam Lon',
       'Spui Lat', 'Spui Lon', 'Centraal Station Lat', 'Centraal Station Lon',
       'weekday', 'is_weekend', 'Sensor', 'SensorLongitude', 'SensorLatitude',
       'TrueCounts', 'LonScaled', 'LatScaled', 'is_event', 'Year', 'month_sin',
       'month_cos', 'day_sin', 'day_cos', 'hour_sin', 'hour_cos',
       'Nieuwmarkt score', 'Nieuwmarkt weight', 'Nieuwezijds Kolk score',
       'Nieuwezijds Kolk weight', 'Dam score', 'Dam weight', 'Spui score',
       'Spui weight', 'Centraal Station score', 'Centraal Station weight'],
      dtype='object')

In [19]:
def classCrowdednessCounts(df):
    low_split = df["True"].quantile(.25)
    mid_split = df["True"].quantile(.5)
    high_split = df["True"].quantile(.75)

    clas_dict = df.to_dict("index")

    for k, v in clas_dict.items():
        if v["True"] < low_split:
            v["True"] = 1
        elif v["True"] >= low_split and v["True"] < mid_split:
            v["True"] = 2
        elif v["True"] >= mid_split and v["True"] < high_split:
            v["True"] = 3
        elif v["True"] >= high_split:
            v["True"] = 4
        else:
            print(k, " has class error as it fits in none")

    df = pd.DataFrame.from_dict(clas_dict, orient="index")

    return df

In [14]:
def getSensor(df):
    df["Sensor"] = "None"
    model_dict = df.to_dict("index")
    
    for k, v in model_dict.items():
        v["Sensor"] = full_df[full_df["LonScaled"] == v["SensLon"]].reset_index()["Sensor"][0]
    
    df = pd.DataFrame.from_dict(model_dict, orient="index")
    
    return df

In [7]:
clas_df = classCrowdednessCounts(full_df)

In [26]:
rfc.head()

Unnamed: 0,SensLat,SensLon,True,Pred
0,-0.925578,-1.170458,1,1
1,1.219034,1.700665,3,3
2,1.430863,1.152297,4,4
3,0.506281,0.279566,1,4
4,-0.925578,-1.170458,1,3


In [23]:
df = getSensor(rfc)
df.head()

Unnamed: 0,SensLat,SensLon,True,Pred,Sensor
0,-0.925578,-1.170458,1,1,GAWW-03
1,1.219034,1.700665,3,3,GAWW-07
2,1.430863,1.152297,4,4,GAWW-01
3,0.506281,0.279566,1,4,GAWW-02
4,-0.925578,-1.170458,1,3,GAWW-03


In [8]:
full = full_df[["Date", "Hour", "Sensor","TrueCounts"]].copy()

df = rfg.merge(full, how="outer", on=["Date", "Hour", "Sensor"], sort=True)

r2 = round(r2_score(df["TrueCounts"], df["CrowdednessCount"]), 3)
rmse = round(np.sqrt(mean_squared_error(df["TrueCounts"], df["CrowdednessCount"])), 2)

print(r2, rmse)

0.869 376.76


## Results

### RFG

In [17]:
rfg_dict = {}
sensors = full_df["Sensor"].unique()

for sensor in sensors:
    df = getSensor(rfg)
    df = df[df["Sensor"] == sensor].copy()
    
    r2 = round(r2_score(df["True"], df["Pred"]), 3)
    rmse = round(np.sqrt(mean_squared_error(df["True"], df["Pred"])), 2)

    rfg_dict[sensor] = {"R2": r2, "RMSE": rmse}
    
rfg_dict

{'GAWW-03': {'R2': 0.907, 'RMSE': 235.94},
 'GAWW-02': {'R2': 0.839, 'RMSE': 557.01},
 'GAWW-01': {'R2': 0.814, 'RMSE': 487.46},
 'GAWW-07': {'R2': 0.927, 'RMSE': 193.18},
 'GAWW-06': {'R2': 0.759, 'RMSE': 501.13},
 'GAWW-05': {'R2': 0.909, 'RMSE': 294.48},
 'GAWW-04': {'R2': 0.852, 'RMSE': 124.83}}

### XGBR

In [18]:
xgbr_dict = {}
sensors = full_df["Sensor"].unique()

for sensor in sensors:
    df = getSensor(xgbr)
    df = df[df["Sensor"] == sensor].copy()
    
    r2 = round(r2_score(df["True"], df["Pred"]), 3)
    rmse = round(np.sqrt(mean_squared_error(df["True"], df["Pred"])), 2)

    xgbr_dict[sensor] = {"R2": r2, "RMSE": rmse}
    
xgbr_dict

{'GAWW-03': {'R2': 0.734, 'RMSE': 399.68},
 'GAWW-02': {'R2': 0.805, 'RMSE': 614.08},
 'GAWW-01': {'R2': 0.775, 'RMSE': 535.9},
 'GAWW-07': {'R2': 0.905, 'RMSE': 219.2},
 'GAWW-06': {'R2': 0.698, 'RMSE': 561.78},
 'GAWW-05': {'R2': 0.875, 'RMSE': 344.23},
 'GAWW-04': {'R2': 0.62, 'RMSE': 199.8}}

### RFC

In [27]:
rfc_dict = {}
sensors = full_df["Sensor"].unique()

for sensor in sensors:
    df = getSensor(rfc)
    df = df[df["Sensor"] == sensor].copy()

    acc = round(accuracy_score(df["True"], df["Pred"]),3)
    prec = precision_score(df["True"], df["Pred"], average=None)
    rec = recall_score(df["True"], df["Pred"], average=None)
    f1 = f1_score(df["True"], df["Pred"], average=None)
    
    rfc_dict[sensor] = {"Accuracy": acc, "Precision": prec, "Recall": rec, "F1 Score": f1}
    
rfc_dict

{'GAWW-03': {'Accuracy': 0.89,
  'Precision': array([0.9707149 , 0.8209607 , 0.70080863, 0.85217391]),
  'Recall': array([0.96987952, 0.7148289 , 0.81761006, 0.79032258]),
  'F1 Score': array([0.97029703, 0.76422764, 0.75471698, 0.82008368])},
 'GAWW-02': {'Accuracy': 0.873,
  'Precision': array([0.7877551 , 0.87362637, 0.82621083, 0.90891473]),
  'Recall': array([0.80416667, 0.77750611, 0.7651715 , 0.97302905]),
  'F1 Score': array([0.79587629, 0.82276843, 0.79452055, 0.93987976])},
 'GAWW-01': {'Accuracy': 0.852,
  'Precision': array([0.90939597, 0.85680751, 0.8248337 , 0.84436275]),
  'Recall': array([0.67581047, 0.8902439 , 0.82666667, 0.94383562]),
  'F1 Score': array([0.77539342, 0.87320574, 0.82574917, 0.89133247])},
 'GAWW-07': {'Accuracy': 0.863,
  'Precision': array([0.9375    , 0.8972332 , 0.85290148, 0.77941176]),
  'Recall': array([0.91569767, 0.8647619 , 0.84832215, 0.84350133]),
  'F1 Score': array([0.92647059, 0.88069835, 0.85060565, 0.81019108])},
 'GAWW-06': {'Accurac

In [28]:
xgbc_dict = {}
sensors = full_df["Sensor"].unique()

for sensor in sensors:
    df = getSensor(xgbc)
    df = df[df["Sensor"] == sensor].copy()

    acc = round(accuracy_score(df["True"], df["Pred"]),3)
    prec = precision_score(df["True"], df["Pred"], average=None)
    rec = recall_score(df["True"], df["Pred"], average=None)
    f1 = f1_score(df["True"], df["Pred"], average=None)
    
    xgbc_dict[sensor] = {"Accuracy": acc, "Precision": prec, "Recall": rec, "F1 Score": f1}
    
xgbc_dict

{'GAWW-03': {'Accuracy': 0.812,
  'Precision': array([0.896875  , 0.72254335, 0.5732899 , 0.72294372]),
  'Recall': array([0.98795181, 0.47528517, 0.55345912, 0.6733871 ]),
  'F1 Score': array([0.94021294, 0.5733945 , 0.5632    , 0.69728601])},
 'GAWW-02': {'Accuracy': 0.783,
  'Precision': array([0.64583333, 0.73816156, 0.66081871, 0.88035892]),
  'Recall': array([0.775     , 0.64792176, 0.59630607, 0.9159751 ]),
  'F1 Score': array([0.70454545, 0.69010417, 0.62690707, 0.89781393])},
 'GAWW-01': {'Accuracy': 0.817,
  'Precision': array([0.85185185, 0.8056872 , 0.78636364, 0.82572115]),
  'Recall': array([0.63092269, 0.82926829, 0.76888889, 0.94109589]),
  'F1 Score': array([0.72492837, 0.81730769, 0.77752809, 0.87964149])},
 'GAWW-07': {'Accuracy': 0.798,
  'Precision': array([0.98601399, 0.84855234, 0.74374255, 0.72422062]),
  'Recall': array([0.81976744, 0.72571429, 0.83758389, 0.80106101]),
  'F1 Score': array([0.8952381 , 0.78234086, 0.78787879, 0.76070529])},
 'GAWW-06': {'Accura