# Crowdedness Data

## Imports <a name="imports"></a>

In [3]:
import pandas as pd
from datetime import datetime

## Import Data <a name="importData"></a>

### Crowdedness Data <a name="crowdData"></a>
The crowdedness count per camera, per day, per hour

In [4]:
#Load in the data
crowd_df = pd.read_excel("../../../Data_thesis/CMSA/cmsa_data.xlsx")

In [5]:
#Group the counts per hour
crowd_df = crowd_df.groupby(["richting", "datum", "uur"])["SampleCount"].sum().reset_index()

#Change names columns
crowd_df = crowd_df.rename(index=str, columns={"richting": "Sensor", "datum": "Date", "uur": "Hour", 
                                               "SampleCount": "CrowdednessCount"})

#Insert columns
crowd_df.insert(3, "SensorLongitude", 0)
crowd_df.insert(4, "SensorLatitude", 0)

In [6]:
#Example Contents
crowd_df.head()

Unnamed: 0,Sensor,Date,Hour,SensorLongitude,SensorLatitude,CrowdednessCount
0,2,2018-03-11,0,0,0,0
1,2,2018-03-11,1,0,0,0
2,2,2018-03-11,2,0,0,0
3,2,2018-03-11,3,0,0,0
4,2,2018-03-11,4,0,0,0


### Crowdedness Data (Without Sensor column)
All the data resides in multiple files, where the filename equals the sensor number

In [7]:
blip_df = pd.read_csv("../../../Data_thesis/CMSA/BlipData.csv")

In [8]:
blip_df.head()

Unnamed: 0,Date,Sensor,CrowdednessCount,Hour
0,2018-04-01,GAWW-01,4271,0
1,2018-04-01,GAWW-01,3414,1
2,2018-04-01,GAWW-01,246,10
3,2019-01-01,GAWW-01,2788,3
4,2018-07-04,GAWW-01,1513,15


### Sensor Locations <a name="senData"></a>
The location of the cameras

In [9]:
#Load in the data
loc_df = pd.read_csv("../../../Data_thesis/Open_Data/crowdedness_sensoren.csv", sep=";")

In [10]:
#Select columns to use
loc_df = loc_df[["Objectnummer", "LNG", "LAT"]]

In [11]:
#Example contents
loc_df.head()

Unnamed: 0,Objectnummer,LNG,LAT
0,GAWW-03,48973932,523725037
1,GAWW-03,48973336,523725237
2,GAWW-02,48988705,523737982
3,GAWW-02,48989027,523737857
4,GAWW-01,48997667,52374627


## Save coordinates needed cameras <a name="cords"></a>
Save the coordinates of the cameras we will use as target values

### Variables

In [12]:
#List needed cameras
needed_sensors = ["GAWW-01", "GAWW-02", "GAWW-03", "GAWW-04", "GAWW-05", "GAWW-06", "GAWW-07", "GAWW-08", "GAWW-09", 
                  "GAWW-10"]

#change df into dict
loc_dict = loc_df.to_dict("index")

#Dict to saved the needed locations in
sensor_dict = {}

### Saved coordinates to Sensor_dict

In [13]:
for k, v in loc_dict.items():
    
    #Save only the cameras with the object nummer given above
    if v["Objectnummer"] in needed_sensors:
        
        #Replace the "." with "," to make sure the coordinates can be turned into floats
        v["LNG"] = float(v["LNG"].replace(",", "."))
        v["LAT"] = float(v["LAT"].replace(",", "."))
        
        #Save all contents in dict
        sensor_dict[v["Objectnummer"]] = {"Longitude": v["LNG"], "Latitude": v["LAT"]}

## Merge Crowdedness and Blip

In [14]:
crowd_df = pd.concat([crowd_df, blip_df], sort=True).reset_index().drop(columns={"index"})

In [15]:
crowd_df.head()

Unnamed: 0,CrowdednessCount,Date,Hour,Sensor,SensorLatitude,SensorLongitude
0,0,2018-03-11 00:00:00,0,2,0.0,0.0
1,0,2018-03-11 00:00:00,1,2,0.0,0.0
2,0,2018-03-11 00:00:00,2,2,0.0,0.0
3,0,2018-03-11 00:00:00,3,2,0.0,0.0
4,0,2018-03-11 00:00:00,4,2,0.0,0.0


## Data to Dict <a name="dict"></a>

### Variables

In [16]:
#Change df into dict
crowd_dict = crowd_df.to_dict("index")

#Different camera names for given camera name
gaww_02 = [2, "02R", "2R", "Oude Kennissteeg Occ wifi"]
gaww_03 = [3, "03R"]

### Combines coordinates with given camera

In [17]:
#Loop over dict
for k, v in crowd_dict.items():
    
    #Change camera name
    if v["Sensor"] in gaww_02:
        v["Sensor"] = "GAWW-02"
        
        #Change the ccordinates of the given camera to the correct ones
        v["SensorLongitude"] = sensor_dict["GAWW-02"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-02"]["Latitude"]
        
    elif v["Sensor"] in gaww_03:
        v["Sensor"] = "GAWW-03"
        v["SensorLongitude"] = sensor_dict["GAWW-03"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-03"]["Latitude"]
        
    elif v["Sensor"] == "GAWW-01":
        v["SensorLongitude"] = sensor_dict["GAWW-01"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-01"]["Latitude"]
    
    elif v["Sensor"] == "GAWW-04":
        v["SensorLongitude"] = sensor_dict["GAWW-04"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-04"]["Latitude"]
   
    elif v["Sensor"] == "GAWW-05":
        v["SensorLongitude"] = sensor_dict["GAWW-05"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-05"]["Latitude"]
        
    elif v["Sensor"] == "GAWW-06":
        v["SensorLongitude"] = sensor_dict["GAWW-06"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-06"]["Latitude"]
        
    elif v["Sensor"] == "GAWW-07":
        v["SensorLongitude"] = sensor_dict["GAWW-07"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-07"]["Latitude"]
        
    elif v["Sensor"] == "GAWW-08":
        v["SensorLongitude"] = sensor_dict["GAWW-08"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-08"]["Latitude"]
    
    elif v["Sensor"] == "GAWW-09":
        v["SensorLongitude"] = sensor_dict["GAWW-09"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-09"]["Latitude"]
        
    elif v["Sensor"] == "GAWW-10":
        v["SensorLongitude"] = sensor_dict["GAWW-10"]["Longitude"]
        v["SensorLatitude"] = sensor_dict["GAWW-10"]["Latitude"] 
        
    if v["Hour"] == 0:
        v["Hour"] == 2400
        
    v["Hour"] *= 100

In [18]:
#Return from Dict
crowd_df = pd.DataFrame.from_dict(crowd_dict, orient="index")

#Onlt save the sensors for which the coordinates are known
crowd_df = crowd_df[crowd_df["Sensor"].isin(needed_sensors)]

crowd_df = crowd_df.groupby(["Sensor", "Date", "Hour", "SensorLongitude", 
                             "SensorLatitude"])["CrowdednessCount"].sum().reset_index()

In [19]:
#Example contents
crowd_df.head()

Unnamed: 0,Sensor,Date,Hour,SensorLongitude,SensorLatitude,CrowdednessCount
0,GAWW-01,2018-03-11,100,4.899847,52.374601,3133
1,GAWW-01,2018-03-11,200,4.899847,52.374601,2120
2,GAWW-01,2018-03-11,300,4.899847,52.374601,1419
3,GAWW-01,2018-03-11,400,4.899847,52.374601,1085
4,GAWW-01,2018-03-11,500,4.899847,52.374601,498


In [20]:
crowd_df.Sensor.unique()

array(['GAWW-01', 'GAWW-02', 'GAWW-03', 'GAWW-04', 'GAWW-05', 'GAWW-06',
       'GAWW-07', 'GAWW-08', 'GAWW-09', 'GAWW-10'], dtype=object)

## DF to File <a name="CSV"></a>

In [21]:
crowd_df.to_csv("../../../Data_thesis/Full_Datasets/Crowdedness.csv", index=False)