In [24]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import folium
import cbsodata
import scipy.stats

In [56]:
data_mot_original = pd.DataFrame(cbsodata.get_data('84709ENG')) # insert dataset with modes of travel data

In [57]:
data_mot_original.set_index('ID', inplace=True) # set ID as index
data_mot_original.head()

Unnamed: 0_level_0,Population,Sex,PersonalCharacteristics,ModesOfTravel,Margins,RegionCharacteristics,Periods,Trips_1,DistanceTravelled_2,TimeTravelled_3,Trips_4,DistanceTravelled_5,TimeTravelled_6
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,Population 6 years or older,Total male and female,Total persons,Total,Value,The Netherlands,2018,2.78,36.16,74.6,1015.0,13200.0,453.8
1,Population 6 years or older,Total male and female,Total persons,Total,Value,The Netherlands,2019,2.71,36.0,74.39,989.0,13140.0,452.6
2,Population 6 years or older,Total male and female,Total persons,Total,Value,The Netherlands,2020,2.35,24.88,61.96,861.0,9105.0,378.0
3,Population 6 years or older,Total male and female,Total persons,Total,Value,The Netherlands,2021,2.51,27.24,69.85,915.0,9942.0,424.9
4,Population 6 years or older,Total male and female,Total persons,Total,Value,Noord-Nederland (LD),2018,2.77,40.99,74.28,1011.0,14962.0,451.9


In [58]:
# selecting needed columns
data_mot = data_mot_original[['PersonalCharacteristics', 'ModesOfTravel','Periods','Trips_1','DistanceTravelled_2']]
data_mot

Unnamed: 0_level_0,PersonalCharacteristics,ModesOfTravel,Periods,Trips_1,DistanceTravelled_2
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,Total persons,Total,2018,2.78,36.16
1,Total persons,Total,2019,2.71,36.00
2,Total persons,Total,2020,2.35,24.88
3,Total persons,Total,2021,2.51,27.24
4,Total persons,Total,2018,2.77,40.99
...,...,...,...,...,...
443515,No driver's license; under 17,Other,2021,,
443516,No driver's license; under 17,Other,2018,,
443517,No driver's license; under 17,Other,2019,,
443518,No driver's license; under 17,Other,2020,,


In [59]:
# rename columns
data_mot.columns = ['Personal characteristics','Modes of travel','Periods','Trips','Distance travelled']

# selecting rows based on conditions
options_ovchipcard = ['Students with week pass publ. transp.',
                    'Persons without student publ.transp.card',
                    ] 
options_travelmode = ['Passenger car (driver)','Train','Bus/metro']
rslt_data_mot = data_mot[data_mot['Personal characteristics'].isin(options_ovchipcard) &
                    data_mot['Modes of travel'].isin(options_travelmode)] 

In [60]:
# making barplot
fig = px.bar(rslt_data_mot, x= 'Modes of travel', y= 'Distance travelled', color = 'Personal characteristics', barmode='group', 
            animation_group="Modes of travel", animation_frame='Periods', range_y = [0,20], title= 'Distance travelled per car and public transport')
fig.update_layout(xaxis={'categoryorder':'array', 'categoryarray':['Bus/metro','Train','Passenger car (driver)']})
fig.show()

As you can see in the barplot, the distance travelled with train by students with a week pass is seven times as big as by people without a student ov chipcard. This makes sense because the train costs more money with a normal ov chipcard. Although students with a week pass still use the car quite a lot: 7,5 kilometers a day on average. The bus and metro are used for shorter distances than the train and car. 

In [61]:
# making scatterplot with trendlines
fig = px.scatter(rslt_data_mot, x="Trips", y="Distance travelled", color='Personal characteristics',
                 facet_col= 'Modes of travel', animation_frame="Periods", trendline="ols")
fig.show()

The daily distance travelled is plotted against the number of daily trips. Also the regression lines per personal characteristic are plotted. It is visible that the number of trips correlates with the distance travelled. More trips on a day means in general a bigger distance travelled. It is also visible that students with a week pass make the most trips per train and per bus and metro. The car is both used for shorter and longer trips, whereas the distance travelled with the train is quite long. It seems that students with a week pass travel greater distances than people without a student week pass. Students travel both by car, train and bus and metro.

Now students with a week pass will be compared with their age group (18 to 24 years).

In [62]:
# selecting rows based on conditions, this time students with week pass and people in the age of 18 to 24 years
options_pc = ['Students with week pass publ. transp.',
                "Age: 18 to 24 years"] 
options_travelmode = ['Passenger car (driver)','Train','Bus/metro']
rslt_data_mot = data_mot[data_mot['Personal characteristics'].isin(options_pc) &
                    data_mot['Modes of travel'].isin(options_travelmode)] 

# making barplot
fig = px.bar(rslt_data_mot, x= 'Modes of travel', y= 'Distance travelled', color = 'Personal characteristics', barmode='group', 
            animation_group="Modes of travel", range_y = [0,20], animation_frame="Periods", title= 'Distance travelled per car and train')
fig.update_layout(xaxis={'categoryorder':'array', 'categoryarray':['Bus/metro','Train','Passenger car (driver)']})
fig.show()

In this plot it is visible that people in the age 18 to 24 years travel greater distances with the car than with the public transport. Although students with a week pass travel greater distances with the car than with the bus and metro. The opposite of students with a student ov chipcard, which are mainly in the same age category. But a big part of this age category is already working because they finish their education young. They probably have more money to spend so they travel with car and they can't have a student ov chipcard. 

In [63]:
# making scatterplot with trendlines
fig = px.scatter(rslt_data_mot, x="Trips", y="Distance travelled", color='Personal characteristics',
                 facet_col= 'Modes of travel', animation_frame="Periods", trendline="ols")
fig.show()

The daily distance travelled is again plotted against the number of daily trips. Also the regression lines per personal characteristic are plotted. It is visible that the number of trips correlates with the distance travelled. The plots look a bit the same as the previous scatterplots. Although there is not a clear gap between the two personal characteristics categories. It can be concluded that students with a week pass travel greater distances with the public transport than people without a week pass, but with a driving license. A considerable part of the students with a week pass do have a driving license as you can see in the plots that they also drive a car.