# Model Results

## Imports

In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

## Import Files

In [3]:
rfg = pd.read_csv("../../../Data_thesis/Full_Datasets/rfg_evalResults.csv")
rfc = pd.read_csv("../../../Data_thesis/Full_Datasets/rfc_evalResults.csv")
xgbr = pd.read_csv("../../../Data_thesis/Full_Datasets/xgbr_evalResults.csv")
xgbc = pd.read_csv("../../../Data_thesis/Full_Datasets/xgbc_evalResults.csv")

pred_rfg = pd.read_csv("../../../Data_thesis/Full_Datasets/rfg_Predictions.csv")
pred_rfg.sort_values(by=["Hour", "Sensor"], inplace=True)
pred_rfc = pd.read_csv("../../../Data_thesis/Full_Datasets/rfc_Predictions.csv")
pred_rfc.sort_values(by=["Hour", "Sensor"], inplace=True)
pred_xgbr = pd.read_csv("../../../Data_thesis/Full_Datasets/xgbr_Predictions.csv")
pred_xgbr.sort_values(by=["Hour", "Sensor"], inplace=True)
pred_xgbc = pd.read_csv("../../../Data_thesis/Full_Datasets/xgbc_Predictions.csv")
pred_xgbc.sort_values(by=["Hour", "Sensor"], inplace=True)
pred_lr = pd.read_csv("../../../Data_thesis/Full_Datasets/lr_Predictions.csv")
pred_lr.sort_values(by=["Hour", "Sensor"], inplace=True)
pred_dc = pd.read_csv("../../../Data_thesis/Full_Datasets/dc_Predictions.csv")
pred_dc.sort_values(by=["Hour", "Sensor"], inplace=True)

In [4]:
full_df = pd.read_csv("../../../Data_thesis/Full_Datasets/Full.csv")
full_df["Date"] = pd.to_datetime(full_df["Date"], format="%Y-%m-%d")
full_df = full_df[full_df["Date"] == pd.Timestamp(2019, 3, 25)].reset_index().drop(columns=["index"])
full_df["Date"] = full_df["Date"].astype(str)
full_df.rename(index=str, columns={"CrowdednessCount": "TrueCounts"}, inplace=True)
full_df.sort_values(by=["Hour", "Sensor"], inplace=True)
full_df = full_df[["Date", "Hour", "Sensor", "LonScaled", "LatScaled", "TrueCounts"]]

In [11]:
full_df.head()

Unnamed: 0,Date,Hour,Sensor,LonScaled,LatScaled,TrueCounts
111,2019-03-25,100,GAWW-01,1.152297,1.430863,1324
112,2019-03-25,100,GAWW-02,0.279566,0.506281,1359
113,2019-03-25,100,GAWW-03,-1.170458,-0.925578,399
114,2019-03-25,100,GAWW-04,-0.640018,-0.06408,277
115,2019-03-25,100,GAWW-05,-0.816523,-1.1188,1300


In [20]:
pred_xgbr.head()

Unnamed: 0,Date,Hour,Sensor,SensorLongitude,SensorLatitude,CrowdednessCount
48,2019-03-25,100,GAWW-01,4.899847,52.374601,2252.652
72,2019-03-25,100,GAWW-02,4.898903,52.373786,2227.0378
0,2019-03-25,100,GAWW-03,4.897334,52.372524,1887.8135
96,2019-03-25,100,GAWW-04,4.897908,52.373283,2215.1257
120,2019-03-25,100,GAWW-05,4.897717,52.372353,2161.635


In [8]:
def classCrowdednessCounts(df):
    low_split = df["TrueCounts"].quantile(.25)
    mid_split = df["TrueCounts"].quantile(.5)
    high_split = df["TrueCounts"].quantile(.75)

    clas_dict = df.to_dict("index")

    for k, v in clas_dict.items():
        if v["TrueCounts"] < low_split:
            v["TrueCounts"] = 1
        elif v["TrueCounts"] >= low_split and v["TrueCounts"] < mid_split:
            v["TrueCounts"] = 2
        elif v["TrueCounts"] >= mid_split and v["TrueCounts"] < high_split:
            v["TrueCounts"] = 3
        elif v["TrueCounts"] >= high_split:
            v["TrueCounts"] = 4
        else:
            print(k, " has class error as it fits in none")

    df = pd.DataFrame.from_dict(clas_dict, orient="index")

    return df

In [9]:
def getSensor(df):
    df["Sensor"] = "None"
    model_dict = df.to_dict("index")
    
    for k, v in model_dict.items():
        v["Sensor"] = full_df[full_df["LonScaled"] == v["Lon"]].reset_index()["Sensor"][0]
    
    df = pd.DataFrame.from_dict(model_dict, orient="index")
    
    return df

In [16]:
clas_df = classCrowdednessCounts(full_df)
clas_df.sort_values(by=["Hour", "Sensor"], inplace=True)

In [17]:
clas_df.head()

Unnamed: 0,Date,Hour,Sensor,LonScaled,LatScaled,TrueCounts
111,2019-03-25,100,GAWW-01,1.152297,1.430863,4
112,2019-03-25,100,GAWW-02,0.279566,0.506281,4
113,2019-03-25,100,GAWW-03,-1.170458,-0.925578,3
114,2019-03-25,100,GAWW-04,-0.640018,-0.06408,2
115,2019-03-25,100,GAWW-05,-0.816523,-1.1188,4


In [14]:
pred_dc.head()

Unnamed: 0,Date,Hour,Sensor,SensorLongitude,SensorLatitude,CrowdednessCount
48,2019-03-25,100,GAWW-01,4.899847,52.374601,3
72,2019-03-25,100,GAWW-02,4.898903,52.373786,3
0,2019-03-25,100,GAWW-03,4.897334,52.372524,3
96,2019-03-25,100,GAWW-04,4.897908,52.373283,3
120,2019-03-25,100,GAWW-05,4.897717,52.372353,3


## Results

In [5]:
r2 = round(r2_score(full_df["TrueCounts"], pred_lr["CrowdednessCount"]), 3)
rmse = round(np.sqrt(mean_squared_error(full_df["TrueCounts"], pred_lr["CrowdednessCount"])), 2)
print("R2: ", r2)
print("RMSE: ", rmse)

R2:  -3208789378017.918
RMSE:  1273871795.59


In [18]:
acc = round(accuracy_score(clas_df["TrueCounts"], pred_dc["CrowdednessCount"]),3)
prec = precision_score(clas_df["TrueCounts"], pred_dc["CrowdednessCount"], average=None)
rec = recall_score(clas_df["TrueCounts"], pred_dc["CrowdednessCount"], average=None)
f1 = f1_score(clas_df["TrueCounts"], pred_dc["CrowdednessCount"], average=None)

print("Accuracy: ", acc)
print("Precision: ", prec)
print("Recall: ", rec)
print("F1 Score: ", f1)

Accuracy:  0.25
Precision:  [0.   0.   0.25 0.  ]
Recall:  [0. 0. 1. 0.]
F1 Score:  [0.  0.  0.4 0. ]


### RFG

In [20]:
rfg_dict = {}
sensors = full_df["Sensor"].unique()

for sensor in sensors:
    df = getSensor(rfg)
    df = df[df["Sensor"] == sensor].copy()
    
    r2 = round(r2_score(df["True"], df["Pred"]), 3)
    rmse = round(np.sqrt(mean_squared_error(df["True"], df["Pred"])), 2)

    rfg_dict[sensor] = {"R2": r2, "RMSE": rmse}
    
rfg_dict

{'GAWW-01': {'R2': 0.814, 'RMSE': 486.26},
 'GAWW-02': {'R2': 0.839, 'RMSE': 557.4},
 'GAWW-03': {'R2': 0.908, 'RMSE': 235.44},
 'GAWW-04': {'R2': 0.852, 'RMSE': 124.56},
 'GAWW-05': {'R2': 0.909, 'RMSE': 294.28},
 'GAWW-06': {'R2': 0.759, 'RMSE': 501.56},
 'GAWW-07': {'R2': 0.927, 'RMSE': 193.06}}

In [8]:
r2 = round(r2_score(full_df["TrueCounts"], pred_rfg["CrowdednessCount"]), 3)
rmse = round(np.sqrt(mean_squared_error(full_df["TrueCounts"], pred_rfg["CrowdednessCount"])), 2)
print("R2: ", r2)
print("RMSE: ", rmse)

R2:  0.207
RMSE:  633.22


### XGBR

In [13]:
xgbr_dict = {}
sensors = full_df["Sensor"].unique()

for sensor in sensors:
    df = getSensor(xgbr)
    df = df[df["Sensor"] == sensor].copy()
    
    r2 = round(r2_score(df["True"], df["Pred"]), 3)
    rmse = round(np.sqrt(mean_squared_error(df["True"], df["Pred"])), 2)

    xgbr_dict[sensor] = {"R2": r2, "RMSE": rmse}
    
xgbr_dict

{'GAWW-01': {'R2': 0.775, 'RMSE': 535.9},
 'GAWW-02': {'R2': 0.805, 'RMSE': 614.08},
 'GAWW-03': {'R2': 0.734, 'RMSE': 399.68},
 'GAWW-04': {'R2': 0.62, 'RMSE': 199.8},
 'GAWW-05': {'R2': 0.875, 'RMSE': 344.23},
 'GAWW-06': {'R2': 0.698, 'RMSE': 561.78},
 'GAWW-07': {'R2': 0.905, 'RMSE': 219.2}}

In [14]:
r2 = round(r2_score(full_df["TrueCounts"], pred_xgbr["CrowdednessCount"]), 3)
rmse = round(np.sqrt(mean_squared_error(full_df["TrueCounts"], pred_xgbr["CrowdednessCount"])), 2)
print("R2: ", r2)
print("RMSE: ", rmse)

R2:  -0.783
RMSE:  949.69


### RFC

In [21]:
rfc_dict = {}
sensors = full_df["Sensor"].unique()

for sensor in sensors:
    df = getSensor(rfc)
    df = df[df["Sensor"] == sensor].copy()

    acc = round(accuracy_score(df["True"], df["Pred"]),3)
    prec = precision_score(df["True"], df["Pred"], average=None)
    rec = recall_score(df["True"], df["Pred"], average=None)
    f1 = f1_score(df["True"], df["Pred"], average=None)
    
    rfc_dict[sensor] = {"Accuracy": acc, "Precision": prec, "Recall": rec, "F1 Score": f1}
    
rfc_dict

{'GAWW-01': {'Accuracy': 0.855,
  'Precision': array([0.91776316, 0.84813084, 0.81898455, 0.85483871]),
  'Recall': array([0.6957606 , 0.88536585, 0.82444444, 0.94383562]),
  'F1 Score': array([0.79148936, 0.86634845, 0.82170543, 0.89713542])},
 'GAWW-02': {'Accuracy': 0.871,
  'Precision': array([0.7877551 , 0.87603306, 0.81741573, 0.90758755]),
  'Recall': array([0.80416667, 0.77750611, 0.76781003, 0.96784232]),
  'F1 Score': array([0.79587629, 0.8238342 , 0.79183673, 0.93674699])},
 'GAWW-03': {'Accuracy': 0.891,
  'Precision': array([0.9707401 , 0.82608696, 0.70026525, 0.86036036]),
  'Recall': array([0.9707401 , 0.72243346, 0.83018868, 0.77016129]),
  'F1 Score': array([0.9707401 , 0.77079108, 0.75971223, 0.81276596])},
 'GAWW-04': {'Accuracy': 0.869,
  'Precision': array([0.94727592, 0.87839102, 0.71181556, 1.        ]),
  'Recall': array([0.89090909, 0.89173789, 0.76234568, 0.66666667]),
  'F1 Score': array([0.91822828, 0.88501414, 0.73621461, 0.8       ])},
 'GAWW-05': {'Accura

In [19]:
acc = round(accuracy_score(clas_df["TrueCounts"], pred_rfc["CrowdednessCount"]),3)
prec = precision_score(clas_df["TrueCounts"], pred_rfc["CrowdednessCount"], average=None)
rec = recall_score(clas_df["TrueCounts"], pred_rfc["CrowdednessCount"], average=None)
f1 = f1_score(clas_df["TrueCounts"], pred_rfc["CrowdednessCount"], average=None)

print("Accuracy: ", acc)
print("Precision: ", prec)
print("Recall: ", rec)
print("F1 Score: ", f1)

Accuracy:  0.333
Precision:  [0.46774194 0.15189873 0.36363636 0.6875    ]
Recall:  [0.69047619 0.28571429 0.0952381  0.26190476]
F1 Score:  [0.55769231 0.19834711 0.1509434  0.37931034]


### XGBC

In [22]:
xgbc_dict = {}
sensors = full_df["Sensor"].unique()

for sensor in sensors:
    df = getSensor(xgbc)
    df = df[df["Sensor"] == sensor].copy()

    acc = round(accuracy_score(df["True"], df["Pred"]),3)
    prec = precision_score(df["True"], df["Pred"], average=None)
    rec = recall_score(df["True"], df["Pred"], average=None)
    f1 = f1_score(df["True"], df["Pred"], average=None)
    
    xgbc_dict[sensor] = {"Accuracy": acc, "Precision": prec, "Recall": rec, "F1 Score": f1}
    
xgbc_dict

{'GAWW-01': {'Accuracy': 0.817,
  'Precision': array([0.85185185, 0.8056872 , 0.78636364, 0.82572115]),
  'Recall': array([0.63092269, 0.82926829, 0.76888889, 0.94109589]),
  'F1 Score': array([0.72492837, 0.81730769, 0.77752809, 0.87964149])},
 'GAWW-02': {'Accuracy': 0.783,
  'Precision': array([0.64583333, 0.73816156, 0.66081871, 0.88035892]),
  'Recall': array([0.775     , 0.64792176, 0.59630607, 0.9159751 ]),
  'F1 Score': array([0.70454545, 0.69010417, 0.62690707, 0.89781393])},
 'GAWW-03': {'Accuracy': 0.812,
  'Precision': array([0.896875  , 0.72254335, 0.5732899 , 0.72294372]),
  'Recall': array([0.98795181, 0.47528517, 0.55345912, 0.6733871 ]),
  'F1 Score': array([0.94021294, 0.5733945 , 0.5632    , 0.69728601])},
 'GAWW-04': {'Accuracy': 0.801,
  'Precision': array([0.89734513, 0.78433024, 0.65690377, 0.        ]),
  'Recall': array([0.83801653, 0.88414055, 0.4845679 , 0.        ]),
  'F1 Score': array([0.86666667, 0.83125   , 0.55772647, 0.        ])},
 'GAWW-05': {'Accura

In [20]:
acc = round(accuracy_score(clas_df["TrueCounts"], pred_xgbc["CrowdednessCount"]),3)
prec = precision_score(clas_df["TrueCounts"], pred_xgbc["CrowdednessCount"], average=None)
rec = recall_score(clas_df["TrueCounts"], pred_xgbc["CrowdednessCount"], average=None)
f1 = f1_score(clas_df["TrueCounts"], pred_xgbc["CrowdednessCount"], average=None)

print("Accuracy: ", acc)
print("Precision: ", prec)
print("Recall: ", rec)
print("F1 Score: ", f1)

Accuracy:  0.226
Precision:  [0.39705882 0.11       0.         0.        ]
Recall:  [0.64285714 0.26190476 0.         0.        ]
F1 Score:  [0.49090909 0.15492958 0.         0.        ]
