# Crowdedness Data

## Imports <a name="imports"></a>

In [212]:
import pandas as pd
from datetime import datetime

## Import Data <a name="importData"></a>

### Crowdedness Data <a name="crowdData"></a>
The crowdedness count per camera, per day, per hour

In [3]:
#Load in the data
crowd_df = pd.read_excel("../../../Data_thesis/CMSA/cmsa_data.xlsx")

In [4]:
#Group the counts per hour
crowd_df = crowd_df.groupby(["richting", "datum", "uur"])["SampleCount"].sum().reset_index()

#Change names columns
crowd_df = crowd_df.rename(index=str, columns={"richting": "Sensor", "datum": "Date", "uur": "Hour", 
                                               "SampleCount": "CrowdednessCount"})

#Insert columns
crowd_df.insert(3, "SensorLongitude", 0)
crowd_df.insert(4, "SensorLatitude", 0)

In [5]:
#Example Contents
crowd_df.head()

Unnamed: 0,Sensor,Date,Hour,SensorLongitude,SensorLatitude,CrowdednessCount
0,2,2018-03-11,0,0,0,0
1,2,2018-03-11,1,0,0,0
2,2,2018-03-11,2,0,0,0
3,2,2018-03-11,3,0,0,0
4,2,2018-03-11,4,0,0,0


### Crowdedness Data (Without Sensor column)
All the data resides in multiple files, where the filename equals the sensor number

#### Import Data and concate to a single DF

In [220]:
#GAWW-01
blip01_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-01.csv", sep=",")
blip01_df = blip01_df[["Timestamp", "SampleCount"]]
blip01_df["Sensor"] = "GAWW-01"

blip01OCC_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-01OCC.csv", sep=",")
blip01OCC_df = blip01OCC_df[["Timestamp", "SampleCount"]]
blip01OCC_df["Sensor"] = "GAWW-01"

blip01R_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-01R.csv", sep=",")
blip01R_df = blip01R_df[["Timestamp", "SampleCount"]]
blip01R_df["Sensor"] = "GAWW-01"

gaww01_df = pd.merge(blip01_df, blip01OCC_df,on=["Timestamp", "Sensor"], how="inner")
gaww01_df = pd.merge(gaww01_df, blip01R_df,on=["Timestamp", "Sensor"], how="inner")

gaww01_df["CrowdednessCount"] = gaww01_df[["SampleCount_x", "SampleCount_y", "SampleCount"]].sum(axis=1)

gaww01_df = gaww01_df[["Timestamp", "CrowdednessCount", "Sensor"]]

gaww01_df = gaww01_df.groupby(["Timestamp", "Sensor"])["CrowdednessCount"].sum().reset_index()

In [221]:
#GAWW-03
blip03OCC_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-03OCC.csv", sep=",")
blip03OCC_df = blip03OCC_df[["Timestamp", "SampleCount"]]
blip03OCC_df["Sensor"] = "GAWW-03"

blip03OCC_df = blip03OCC_df.rename(index=str, columns={"SampleCount": "CrowdednessCount"})

gaww03_df = blip03OCC_df.groupby(["Timestamp", "Sensor"])["CrowdednessCount"].sum().reset_index()

In [222]:
#Join the above given dataframes
blip_df = pd.concat([gaww01_df, gaww03_df]).reset_index().drop(columns={"index"})

In [223]:
#GAWW-04
blip04_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-04.csv", sep=",")
blip04_df = blip04_df[["Timestamp", "SampleCount"]]
blip04_df["Sensor"] = "GAWW-04"

blip04OCC_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-04OCC.csv", sep=",")
blip04OCC_df = blip04OCC_df[["Timestamp", "SampleCount"]]
blip04OCC_df["Sensor"] = "GAWW-04"

blip04R_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-04R.csv", sep=",")
blip04R_df = blip04R_df[["Timestamp", "SampleCount"]]
blip04R_df["Sensor"] = "GAWW-04"

gaww04_df = pd.merge(blip04_df, blip04OCC_df,on=["Timestamp", "Sensor"], how="inner")
gaww04_df = pd.merge(gaww04_df, blip04R_df,on=["Timestamp", "Sensor"], how="inner")

gaww04_df["CrowdednessCount"] = gaww04_df[["SampleCount_x", "SampleCount_y", "SampleCount"]].sum(axis=1)

gaww04_df = gaww04_df[["Timestamp", "CrowdednessCount", "Sensor"]]

gaww04_df = gaww04_df.groupby(["Timestamp", "Sensor"])["CrowdednessCount"].sum().reset_index()

In [224]:
#Join the above given dataframes
blip_df = pd.concat([blip_df, gaww04_df]).reset_index().drop(columns={"index"})

In [225]:
#GAWW-05
blip05_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-05.csv", sep=",")
blip05_df = blip05_df[["Timestamp", "SampleCount"]]
blip05_df["Sensor"] = "GAWW-05"

blip05R_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-05R.csv", sep=",")
blip05R_df = blip05R_df[["Timestamp", "SampleCount"]]
blip05R_df["Sensor"] = "GAWW-05"

gaww05_df = pd.merge(blip05_df, blip05R_df,on=["Timestamp", "Sensor"], how="inner")

gaww05_df["CrowdednessCount"] = gaww05_df[["SampleCount_x", "SampleCount_y"]].sum(axis=1)

gaww05_df = gaww05_df[["Timestamp", "CrowdednessCount", "Sensor"]]

gaww05_df = gaww05_df.groupby(["Timestamp", "Sensor"])["CrowdednessCount"].sum().reset_index()

In [226]:
#Join the above given dataframes
blip_df = pd.concat([blip_df, gaww05_df]).reset_index().drop(columns={"index"})

In [227]:
#GAWW-06
blip06_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-06.csv", sep=",")
blip06_df = blip06_df[["Timestamp", "SampleCount"]]
blip06_df["Sensor"] = "GAWW-06"

blip06OCC_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-06OCC.csv", sep=",")
blip06OCC_df = blip06OCC_df[["Timestamp", "SampleCount"]]
blip06OCC_df["Sensor"] = "GAWW-06"

blip06R_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-06R.csv", sep=",")
blip06R_df = blip06R_df[["Timestamp", "SampleCount"]]
blip06R_df["Sensor"] = "GAWW-06"

gaww06_df = pd.merge(blip06_df, blip06OCC_df,on=["Timestamp", "Sensor"], how="inner")
gaww06_df = pd.merge(gaww06_df, blip06R_df,on=["Timestamp", "Sensor"], how="inner")

gaww06_df["CrowdednessCount"] = gaww06_df[["SampleCount_x", "SampleCount_y", "SampleCount"]].sum(axis=1)

gaww06_df = gaww06_df[["Timestamp", "CrowdednessCount", "Sensor"]]

gaww06_df = gaww06_df.groupby(["Timestamp", "Sensor"])["CrowdednessCount"].sum().reset_index()

In [228]:
blip_df = pd.concat([blip_df, gaww06_df]).reset_index().drop(columns={"index"})

In [229]:
#GAWW-07
blip07_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-07.csv", sep=",")
blip07_df = blip07_df[["Timestamp", "SampleCount"]]
blip07_df["Sensor"] = "GAWW-07"

blip07OCC_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-07OCC.csv", sep=",")
blip07OCC_df = blip07OCC_df[["Timestamp", "SampleCount"]]
blip07OCC_df["Sensor"] = "GAWW-07"

blip07R_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-07R.csv", sep=",")
blip07R_df = blip07R_df[["Timestamp", "SampleCount"]]
blip07R_df["Sensor"] = "GAWW-07"

gaww07_df = pd.merge(blip07_df, blip07OCC_df,on=["Timestamp", "Sensor"], how="inner")
gaww07_df = pd.merge(gaww07_df, blip07R_df,on=["Timestamp", "Sensor"], how="inner")

gaww07_df["CrowdednessCount"] = gaww07_df[["SampleCount_x", "SampleCount_y", "SampleCount"]].sum(axis=1)

gaww07_df = gaww07_df[["Timestamp", "CrowdednessCount", "Sensor"]]

gaww07_df = gaww07_df.groupby(["Timestamp", "Sensor"])["CrowdednessCount"].sum().reset_index()

In [230]:
blip_df = pd.concat([blip_df, gaww07_df]).reset_index().drop(columns={"index"})

In [231]:
blip08OCC_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-08OCC.csv", sep=",")
blip08OCC_df = blip08OCC_df[["Timestamp", "SampleCount"]]
blip08OCC_df["Sensor"] = "GAWW-08"

blip08OCC_df = blip08OCC_df.rename(index=str, columns={"SampleCount": "CrowdednessCount"})

gaww08_df = blip08OCC_df.groupby(["Timestamp", "Sensor"])["CrowdednessCount"].sum().reset_index()

In [232]:
blip_df = pd.concat([blip_df, gaww08_df]).reset_index().drop(columns={"index"})

In [233]:
blip09OCC_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-09OCC.csv", sep=",")
blip09OCC_df = blip09OCC_df[["Timestamp", "SampleCount"]]
blip09OCC_df["Sensor"] = "GAWW-09"

blip09OCC_df = blip09OCC_df.rename(index=str, columns={"SampleCount": "CrowdednessCount"})

gaww09_df = blip09OCC_df.groupby(["Timestamp", "Sensor"])["CrowdednessCount"].sum().reset_index()

In [234]:
blip_df = pd.concat([blip_df, gaww09_df]).reset_index().drop(columns={"index"})

In [235]:
blip10OCC_df = pd.read_csv("../../../Data_thesis/CMSA/BlipTrack-10OCC.csv", sep=",")
blip10OCC_df = blip10OCC_df[["Timestamp", "SampleCount"]]
blip10OCC_df["Sensor"] = "GAWW-10"

blip10OCC_df = blip10OCC_df.rename(index=str, columns={"SampleCount": "CrowdednessCount"})

gaww10_df = blip10OCC_df.groupby(["Timestamp", "Sensor"])["CrowdednessCount"].sum().reset_index()

In [236]:
blip_df = pd.concat([blip_df, gaww10_df]).reset_index().drop(columns={"index"})

In [237]:
blip_df["Hour"] = 99
blip_df = blip_df.rename(index=str, columns={"Timestamp": "Date"})

In [238]:
blip_df.head()

Unnamed: 0,Date,Sensor,CrowdednessCount,Hour
0,01-Apr-2018 00:00:00,GAWW-01,4271,99
1,01-Apr-2018 01:00:00,GAWW-01,3414,99
2,01-Apr-2018 02:00:00,GAWW-01,2292,99
3,01-Apr-2018 03:00:00,GAWW-01,1630,99
4,01-Apr-2018 04:00:00,GAWW-01,1148,99


#### Change TimeStamp to Date and Hour

In [245]:
blip_dict = blip_df.to_dict("index")

In [249]:
#Datetime format
date_format_1 = '%d-%b-%Y %H:%M:%S'
date_format_2 = '%d-%B-%Y %H:%M:%S'

#Loop over dict
for k, v in blip_dict.items():    
    try:
        #Transform the date string to datatime.date object
        date = datetime.strptime(v["Date"], date_format_1)
        v["Date"] = date.date()
        v["Hour"] = date.time().hour
        #Transfrom date to weekday number
    except:
        date = datetime.strptime(v["Date"], date_format_2)
        v["Date"] = date.date()
        v["Hour"] = date.time().hour

In [256]:
blip_df = pd.DataFrame.from_dict(blip_dict, orient="index").reset_index().drop(columns={'index'})

In [257]:
blip_df.head()

Unnamed: 0,Date,Sensor,CrowdednessCount,Hour
0,2018-04-01,GAWW-01,4271,0
1,2018-04-01,GAWW-01,3414,1
2,2018-04-01,GAWW-01,246,10
3,2019-01-01,GAWW-01,2788,3
4,2018-07-04,GAWW-01,1513,15


### Sensor Locations <a name="senData"></a>
The location of the cameras

In [258]:
#Load in the data
loc_df = pd.read_csv("../../../Data_thesis/Open_Data/crowdedness_sensoren.csv", sep=";")

In [259]:
#Select columns to use
loc_df = loc_df[["Objectnummer", "LNG", "LAT"]]

In [260]:
#Example contents
loc_df.head()

Unnamed: 0,Objectnummer,LNG,LAT
0,GAWW-03,48973932,523725037
1,GAWW-03,48973336,523725237
2,GAWW-02,48988705,523737982
3,GAWW-02,48989027,523737857
4,GAWW-01,48997667,52374627


## Save coordinates needed cameras <a name="cords"></a>
Save the coordinates of the cameras we will use as target values

### Variables

In [261]:
#List needed cameras
needed_sensors = ["GAWW-01", "GAWW-02", "GAWW-03", "GAWW-04", "GAWW-05", "GAWW-06", "GAWW-07", "GAWW-08", "GAWW-09", 
                  "GAWW-10"]

#change df into dict
loc_dict = loc_df.to_dict("index")

#Dict to saved the needed locations in
sensor_dict = {}

### Saved coordinates to Sensor_dict

In [262]:
for k, v in loc_dict.items():
    
    #Save only the cameras with the object nummer given above
    if v["Objectnummer"] in needed_sensors:
        
        #Replace the "." with "," to make sure the coordinates can be turned into floats
        v["LNG"] = float(v["LNG"].replace(",", "."))
        v["LAT"] = float(v["LAT"].replace(",", "."))
        
        #Save all contents in dict
        sensor_dict[v["Objectnummer"]] = {"Longitude": v["LNG"], "Latitude": v["LAT"]}

## Merge Crowdedness and Blip

In [273]:
crowd_df = pd.concat([crowd_df, blip_df], sort=True).reset_index().drop(columns={"index"})

In [274]:
crowd_df.head()

Unnamed: 0,CrowdednessCount,Date,Hour,Sensor,SensorLatitude,SensorLongitude
0,0,2018-03-11 00:00:00,0,GAWW-02,52.373786,4.898903
1,0,2018-03-11 00:00:00,10000,GAWW-02,52.373786,4.898903
2,0,2018-03-11 00:00:00,100000,GAWW-02,52.373786,4.898903
3,39,2018-03-15 00:00:00,40000,GAWW-02,52.373786,4.898903
4,1618,2018-04-21 00:00:00,170000,GAWW-02,52.373786,4.898903


## Data to Dict <a name="dict"></a>

### Variables

In [275]:
#Change df into dict
crowd_dict = crowd_df.to_dict("index")

#Different camera names for given camera name
gaww_02 = [2, "02R", "2R", "Oude Kennissteeg Occ wifi"]
gaww_03 = [3, "03R"]

### Combines coordinates with given camera

In [276]:
#Loop over dict
for k, v in crowd_dict.items():
    
    #Change camera name
    if v["Sensor"] in gaww_02:
        v["Sensor"] = "GAWW-02"
        
        #Change the ccordinates of the given camera to the correct ones
        v["SensorLongitude"] = sensor_dict["GAWW-02"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-02"]["Latitude"]
        
    elif v["Sensor"] in gaww_03:
        v["Sensor"] = "GAWW-03"
        v["SensorLongitude"] = sensor_dict["GAWW-03"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-03"]["Latitude"]
        
    elif v["Sensor"] == "GAWW-01":
        v["SensorLongitude"] = sensor_dict["GAWW-01"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-01"]["Latitude"]
    
    elif v["Sensor"] == "GAWW-04":
        v["SensorLongitude"] = sensor_dict["GAWW-04"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-04"]["Latitude"]
   
    elif v["Sensor"] == "GAWW-05":
        v["SensorLongitude"] = sensor_dict["GAWW-05"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-05"]["Latitude"]
        
    elif v["Sensor"] == "GAWW-06":
        v["SensorLongitude"] = sensor_dict["GAWW-06"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-06"]["Latitude"]
        
    elif v["Sensor"] == "GAWW-07":
        v["SensorLongitude"] = sensor_dict["GAWW-07"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-07"]["Latitude"]
        
    elif v["Sensor"] == "GAWW-08":
        v["SensorLongitude"] = sensor_dict["GAWW-08"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-08"]["Latitude"]
    
    elif v["Sensor"] == "GAWW-09":
        v["SensorLongitude"] = sensor_dict["GAWW-09"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-09"]["Latitude"]
        
    elif v["Sensor"] == "GAWW-10":
        v["SensorLongitude"] = sensor_dict["GAWW-10"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-10"]["Latitude"]       
        
    v["Hour"] *= 100

In [280]:
#Return from Dict
crowd_df = pd.DataFrame.from_dict(crowd_dict, orient="index")

#Onlt save the sensors for which the coordinates are known
crowd_df = crowd_df[crowd_df["Sensor"].isin(needed_sensors)]

crowd_df = crowd_df.groupby(["Sensor", "Date", "Hour", "SensorLongitude", 
                             "SensorLatitude"])["CrowdednessCount"].sum().reset_index()

In [281]:
#Example contents
crowd_df.head()

Unnamed: 0,Sensor,Date,Hour,SensorLongitude,SensorLatitude,CrowdednessCount
0,GAWW-01,2018-03-11,100,4.899847,52.374601,6266
1,GAWW-01,2018-03-11,200,4.899847,52.374601,4240
2,GAWW-01,2018-03-11,300,4.899847,52.374601,2838
3,GAWW-01,2018-03-11,400,4.899847,52.374601,2170
4,GAWW-01,2018-03-11,500,4.899847,52.374601,996


In [282]:
crowd_df.Sensor.unique()

array(['GAWW-01', 'GAWW-02', 'GAWW-03', 'GAWW-04', 'GAWW-05', 'GAWW-06',
       'GAWW-07', 'GAWW-08', 'GAWW-09', 'GAWW-10'], dtype=object)

## DF to File <a name="CSV"></a>

In [284]:
crowd_df.to_csv("../../../Data_thesis/Full_Datasets/Crowdedness.csv", index=False)