# Construction Full Dataset

## Imports

In [28]:
#Imports
import json
import pandas as pd
import re
import numpy as np

import nbimporter

import Crowdedness
import GVBData
import AmsterdamEvent
import CombineData

Importing Jupyter notebook from CombineData.ipynb


## Import Data

### Crowdedness 

In [3]:
crowd_df = pd.read_excel("../../../Data_thesis/CMSA/cmsa_data.xlsx")

In [4]:
blip_df = pd.read_csv("../../../Data_thesis/CMSA/BlipData.csv")

In [5]:
sensor_df = pd.read_csv("../../../Data_thesis/Open_Data/crowdedness_sensoren.csv", sep=";")

### GVB

In [16]:
arr_df = pd.read_csv("../../../Data_thesis/GVB/Datalab_Reis_Bestemming_Uur_20190402.csv", sep=";")
dep_df = pd.read_csv("../../../Data_thesis/GVB/Datalab_Reis_Herkomst_Uur_20190403.csv", sep=";")

### Event

In [23]:
with open("../../../Data_thesis/Open_Data/Evenementen.json") as event_data:
    events = json.load(event_data)

## Variables

### Crowdedness 

In [9]:
#Sensors to use in Sensor Data
needed_sensors = ["GAWW-01", "GAWW-02", "GAWW-03", "GAWW-04", "GAWW-05", "GAWW-06", "GAWW-07", "GAWW-08", "GAWW-09",
                  "GAWW-10"]

#Alternative names Sensors
gaww_02 = [2, "02R", "2R", "Oude Kennissteeg Occ wifi"]
gaww_03 = [3, "03R"]

### GVB

In [17]:
stations = ["Nieuwmarkt", "Nieuwezijds Kolk", "Dam", "Spui"]

### Event

In [24]:
#Parameters for area to search in
#longitude
lon_low = 4.88
lon_high = 4.92

#Latitude
lat_low = 52.36
lat_high = 52.39

#Start date for relevant events
start_date = pd.Timestamp(2018, 3, 11)

#End date for relevant events
end_date = pd.Timestamp(2019, 4, 30)

## Functions

### Crowdedness

In [18]:
locations_dict = Crowdedness.SensorCoordinates(sensor_df, needed_sensors)
full_crowd_df = Crowdedness.CrowdednessData(crowd_df, blip_df, locations_dict, needed_sensors, gaww_02, gaww_03)

full_crowd_df.head()

Unnamed: 0,Sensor,Date,Hour,SensorLongitude,SensorLatitude,CrowdednessCount
0,GAWW-01,2018-03-11,100,8,8,3133
1,GAWW-01,2018-03-11,200,8,8,2120
2,GAWW-01,2018-03-11,300,8,8,1419
3,GAWW-01,2018-03-11,400,8,8,1085
4,GAWW-01,2018-03-11,500,8,8,498


### GVB

In [20]:
full_gvb_df = GVBData.stationData(arr_df, dep_df, stations)
full_gvb_df = GVBData.TransformData(full_gvb_df, stations)

full_gvb_df.head()

Unnamed: 0,Date,Hour,Nieuwmarkt Arrivals,Nieuwezijds Kolk Arrivals,Dam Arrivals,Spui Arrivals,Nieuwmarkt Departures,Nieuwezijds Kolk Departures,Dam Departures,Spui Departures,weekday,is_weekend
0,2018-01-01,100,11.0,0.0,0.0,0.0,340.0,27.0,0.0,0.0,0,0
1,2018-01-01,200,48.0,0.0,21.0,0.0,175.0,130.0,39.0,0.0,0,0
2,2018-01-01,300,10.0,16.0,13.0,0.0,137.0,17.0,48.0,0.0,0,0
3,2018-01-01,400,16.0,0.0,0.0,0.0,48.0,46.0,34.0,0.0,0,0
4,2018-01-01,500,17.0,0.0,0.0,0.0,56.0,0.0,20.0,0.0,0,0


### Event

In [26]:
full_event_df = AmsterdamEvent.transformData(events, lat_low, lat_high, lon_low, lon_high, start_date, end_date)

full_event_df.head()

Unnamed: 0,Date,is_event
0,2018-04-20,1.0
1,2018-05-20,1.0
2,2018-06-02,1.0
3,2018-06-03,1.0
4,2018-06-04,1.0


### CombineData

In [29]:
full_crowd_df, full_gvb_df, full_event_df = CombineData.importData(full_crowd_df, full_gvb_df, full_event_df)
full_gvb_df, full_event_df = CombineData.changeStartEndDate(full_crowd_df, full_gvb_df, full_event_df)
full_df = CombineData.formFullDF(full_crowd_df, full_gvb_df, full_event_df)

full_df.head()

Unnamed: 0,index,Date,Hour,Nieuwmarkt Arrivals,Nieuwezijds Kolk Arrivals,Dam Arrivals,Spui Arrivals,Nieuwmarkt Departures,Nieuwezijds Kolk Departures,Dam Departures,...,SensorLatitude,CrowdednessCount,is_event,Year,month_sin,month_cos,day_sin,day_cos,hour_sin,hour_cos
0,0,2018-03-11,2300,488.0,87.0,359.0,35.0,532.0,177.0,892.0,...,5.0,759.0,0.0,2018,1.0,6.123234000000001e-17,0.188227,0.982126,-0.258819,0.965926
1,1,2018-03-11,2000,332.0,194.0,627.0,115.0,600.0,193.0,1080.0,...,7.0,1771.0,0.0,2018,1.0,6.123234000000001e-17,0.188227,0.982126,-0.866025,0.5
2,2,2018-03-11,2000,332.0,194.0,627.0,115.0,600.0,193.0,1080.0,...,0.0,28.0,0.0,2018,1.0,6.123234000000001e-17,0.188227,0.982126,-0.866025,0.5
3,3,2018-03-11,2000,332.0,194.0,627.0,115.0,600.0,193.0,1080.0,...,9.0,80.0,0.0,2018,1.0,6.123234000000001e-17,0.188227,0.982126,-0.866025,0.5
4,4,2018-03-11,2000,332.0,194.0,627.0,115.0,600.0,193.0,1080.0,...,2.0,37.0,0.0,2018,1.0,6.123234000000001e-17,0.188227,0.982126,-0.866025,0.5
