#### Load all packages

In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import warnings
import datetime
%matplotlib inline
warnings.filterwarnings('ignore')

#### Load data for origin and destination per hour

In [2]:
origin = pd.read_csv('../../Data/20190403/Datalab_Reis_Herkomst_Uur_20190403.csv', sep=';')
origin = origin.dropna()
destination = pd.read_csv('../../Data/20190403/Datalab_Reis_Bestemming_Uur_20190402.csv', sep=';')
destination = destination.dropna()

In [3]:
# List of all stations in the sub-network
stations = ['Amsteldijk','Amstelstation','Amstelveenseweg','Beethovenstraat','Bullewijk','Burg.de Vlugtlaan', 
    'Centraal Station','Cornelis Troostplein','Dam','De Boelelaan/VU','De Pijp','Europaplein',
    'Gaasperplas','Gein','Gerrit v.d. Veenstraat','Heemstedestraat','Henk Sneevlietweg','IJsbaanpad', 
    'Isolatorweg','Jan v.Galenstraat','Keizersgracht','Koningsplein','Kraaienneststation','Leidseplein',
    'Maasstraat','Marie Heinekenplein','Minervaplein','Muntplein','Museumplein','Nieuwezijds Kolk',
    'Nieuwmarkt','Noord','Noorderpark','Olympiaplein','Olympiaweg','Olympisch stadion','Overamstel', 
    'Postjesweg','Prinsengracht','Reigersbos','Rijksmuseum','Roelof Hartplein','Rokin','Scheldestraat',
    'Spaklerweg','Spui','Station Bijlmer ArenA','Station Diemen-Zuid','Station Duivendrecht',
    'Station Ganzenhoef','Station Holendrecht','Station Lelylaan','Station RAI','Station Sloterdijk', 
    'Strandvliet','Van Baerlestraat','Van der Madeweg','Venserpolder','Verrijn Stuartweg','Victorieplein', 
    'Vijzelgracht','VU medisch centrum','Waalstraat','Waterlooplein','Weesperplein','Wibautstraat','Station Zuid']

#### Change dates to datetime objects and add weekday column

In [4]:
origin['Datum'] = origin['Datum'].apply(lambda x: datetime.datetime.strptime(x[:-3], '%m/%d/%Y %H:%M:%S'))
origin['Weekdag'] = origin['Datum'].apply(lambda x: x.isoweekday())

destination['Datum'] = destination['Datum'].apply(lambda x: datetime.datetime.strptime(x[:-3], '%m/%d/%Y %H:%M:%S'))
destination['Weekdag'] = destination['Datum'].apply(lambda x: x.isoweekday())

#### Remove data where the Noord/Zuid line did not run yet
The Noord/Zuid line was opened on the 21st of July, so all data before August is not used.

In [11]:
origin = origin[origin['Datum'] >= datetime.datetime(2018, 8, 1)]
destination = destination[destination['Datum'] >= datetime.datetime(2018, 8, 1)]

#### Apply necessary filters for analysis
The first analysis is only applied on one type of day (1 = Mondays) and for the stations that are included in the sub-network.

In [19]:
origin = origin[(origin['VertrekHalteNaam'].isin(stations)) & (origin['Weekdag'] == 1)]
destination = destination[(destination['AankomstHalteNaam'].isin(stations)) & (destination['Weekdag'] == 1)]

#### Group by hour for further analysis

In [35]:
grouped = origin.groupby(['UurgroepOmschrijving (van vertrek)'])

In [38]:
grouped.describe()

Unnamed: 0_level_0,AantalReizen,AantalReizen,AantalReizen,AantalReizen,AantalReizen,AantalReizen,AantalReizen,AantalReizen,VertrekLat,VertrekLat,...,VertrekLon,VertrekLon,Weekdag,Weekdag,Weekdag,Weekdag,Weekdag,Weekdag,Weekdag,Weekdag
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
UurgroepOmschrijving (van vertrek),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
00:00 - 00:59,1407.0,51.813788,73.899037,10.0,14.0,28.0,58.0,972.0,1407.0,4.893818,...,52.374929,52.401172,1407.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
01:00 - 01:59,298.0,32.52349,20.622659,10.0,16.0,27.0,43.0,117.0,298.0,4.895226,...,52.379616,52.388017,298.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
02:00 - 02:59,232.0,26.090517,16.975549,10.0,15.0,20.5,32.0,123.0,232.0,4.894105,...,52.379677,52.388017,232.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
03:00 - 03:59,168.0,20.642857,10.671068,10.0,13.0,18.0,25.25,70.0,168.0,4.893738,...,52.379677,52.37975,168.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
04:00 - 04:59,92.0,14.597826,5.785658,10.0,11.0,13.0,15.25,43.0,92.0,4.894678,...,52.379632,52.37975,92.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
05:00 - 05:59,1022.0,33.963796,28.588979,10.0,13.0,22.0,44.0,169.0,1022.0,4.912497,...,52.371966,52.401172,1022.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
06:00 - 06:59,2166.0,82.951524,105.927924,10.0,15.0,41.5,110.75,676.0,2166.0,4.897355,...,52.377494,52.401172,2166.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
07:00 - 07:59,4041.0,151.574858,294.71415,10.0,18.0,37.0,147.0,2231.0,4041.0,4.893983,...,52.373009,52.401172,4041.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
08:00 - 08:59,5223.0,226.263259,547.744224,10.0,21.0,44.0,171.5,5538.0,5223.0,4.893567,...,52.36928,52.401172,5223.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
09:00 - 09:59,4848.0,150.418523,347.203969,10.0,20.0,41.0,133.0,3981.0,4848.0,4.892936,...,52.369331,52.401172,4848.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0


In [57]:
# Mean is too high, add station with a value of zero if it does not appear in the row? sum/34
origin.groupby(['UurgroepOmschrijving (van vertrek)', 'VertrekHalteNaam']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,VertrekLat,VertrekLon,AantalReizen,Weekdag
UurgroepOmschrijving (van vertrek),VertrekHalteNaam,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
00:00 - 00:59,Amsteldijk,4.911241,52.347113,25,1
00:00 - 00:59,Amstelstation,186.872385,1989.165562,1561,38
00:00 - 00:59,Amstelveenseweg,19.429914,209.366427,56,4
00:00 - 00:59,Bullewijk,49.523358,523.064220,149,10
00:00 - 00:59,Burg.de Vlugtlaan,19.352364,209.518048,48,4
00:00 - 00:59,Centraal Station,882.137964,9428.131441,23682,180
00:00 - 00:59,Dam,474.560833,5080.247140,7393,97
00:00 - 00:59,De Pijp,220.088704,2355.880319,1645,45
00:00 - 00:59,Europaplein,83.153533,889.811532,308,17
00:00 - 00:59,Gaasperplas,4.984686,52.310806,15,1


In [58]:
origin

Unnamed: 0,Datum,UurgroepOmschrijving (van vertrek),VertrekHalteCode,VertrekHalteNaam,VertrekLat,VertrekLon,AantalReizen,Weekdag
1734117,2018-08-06 12:00:00,00:00 - 00:59,00560,Station Bijlmer ArenA,4.947479,52.311258,13,1
1734118,2018-08-06 12:00:00,00:00 - 00:59,00561,Station Bijlmer ArenA,4.947419,52.311329,14,1
1734120,2018-08-06 12:00:00,00:00 - 00:59,02361,Station Sloterdijk,4.839319,52.388335,11,1
1734121,2018-08-06 12:00:00,00:00 - 00:59,05011,Centraal Station,4.899218,52.378108,141,1
1734122,2018-08-06 12:00:00,00:00 - 00:59,05022,Centraal Station,4.901043,52.377720,194,1
1734123,2018-08-06 12:00:00,00:00 - 00:59,05031,Dam,4.893484,52.373411,112,1
1734124,2018-08-06 12:00:00,00:00 - 00:59,05032,Dam,4.895494,52.374929,20,1
1734125,2018-08-06 12:00:00,00:00 - 00:59,05035,Rokin,4.892104,52.369396,45,1
1734126,2018-08-06 12:00:00,00:00 - 00:59,05036,Rokin,4.892305,52.369855,12,1
1734129,2018-08-06 12:00:00,00:00 - 00:59,05052,Waterlooplein,4.902347,52.367273,15,1
