In [1]:
%use dataframe(v=0.13.1)

In [2]:
// reading the csv by dragAndDrop
// from https://www.rijdendetreinen.nl/en/open-data/disruptions#downloads
val disruptions2023 = DataFrame.readCSV("data/disruptions/disruptions-2023.csv", delimiter = ',')
disruptions2023

rdt_id,ns_lines,rdt_lines,rdt_lines_id,rdt_station_names,rdt_station_codes,cause_nl,cause_en,statistical_cause_nl,statistical_cause_en,cause_group,start_time,end_time,duration_minutes
45999,Amsterdam-Rotterdam-Brussel (HSL),Amsterdam Centraal - Schiphol Airport...,2432.0,"Amsterdam Centraal,Amsterdam Lelylaan...","ASD, ASDL, ASS, RTD, SHL",wisselstoring,points failure,wisselstoring,points failure,infrastructure,2023-01-01T08:19:26,2023-01-01T22:43:08,864
46000,Zwolle-Leeuwarden,Leeuwarden - Zwolle,160.0,"Heerenveen,Wolvega,Heerenveen IJsstadion","HR, WV, HRY",dier op het spoor,an animal on the railway track,dier op het spoor,an animal on the railway track,external,2023-01-01T10:31:49,2023-01-01T10:56:17,24
46001,Heerlen-Aachen Hbf,Aachen Hbf - Heerlen,130.0,"Aachen Hbf,Eygelshoven Markt,Heerlen,...","AHBF, EGHM, HRL, HRLK, HZ, LG, AW",beperkingen in de materieelinzet,problems with the rolling stock,beperkingen in de materieelinzet,problems with the rolling stock,rolling stock,2023-01-01T13:19:24,2023-01-02T00:02:39,643
46002,Zutphen-Winterswijk,Winterswijk - Zutphen,83.0,"Vorden,Zutphen","VD, ZP",aanrijding,collision,aanrijding,collision,accidents,2023-01-01T17:15:22,2023-01-01T20:14:23,179
46003,Heerlen-Aachen Hbf,Aachen Hbf - Heerlen,130.0,"Aachen Hbf,Eygelshoven Markt,Heerlen,...","AHBF, EGHM, HRL, HRLK, HZ, LG, AW",beperkingen in de materieelinzet,problems with the rolling stock,beperkingen in de materieelinzet,problems with the rolling stock,rolling stock,2023-01-02T05:57:27,2023-01-03T02:07:13,1210
46004,Amersfoort-Ede-Wageningen,Amersfoort - Ede-Wageningen,47.0,"Amersfoort Centraal,Barneveld Centrum...","AMF, BNC, BNN, ED, EDC, LTN, HVL, BNZ",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T06:36:39,2023-01-02T07:28:16,52
46005,Dordrecht-Breda; Dordrecht-Roosendaal,"Breda - Dordrecht, Dordrecht - Roosen...",170171.0,"Dordrecht,Dordrecht Zuid,Lage Zwaluwe","DDR, DDZD, ZLW",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T07:31:33,2023-01-02T08:09:37,38
46006,'s-Hertogenbosch-Tilburg,'s-Hertogenbosch - Tilburg,69.0,"'s-Hertogenbosch,Tilburg","HT, TB",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T11:33:15,2023-01-02T11:44:27,11
46007,Rotterdam-Breda (HSL),Breda - Rotterdam Centraal (HSL),15.0,"Breda,Rotterdam Centraal","BD, RTD",gestrande trein,stranded train,gestrande trein,stranded train,rolling stock,2023-01-02T11:50:11,2023-01-02T12:25:39,35
46008,Amsterdam-Schiphol-Rotterdam (HSL),Amsterdam Centraal - Schiphol Airport...,2432.0,"Amsterdam Centraal,Amsterdam Lelylaan...","ASD, ASDL, ASS, RTD, SHL",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T12:40:11,2023-01-02T13:08:08,28


In [3]:
disruptions2023.schema()

rdt_id: Int
ns_lines: String
rdt_lines: String?
rdt_lines_id: Double?
rdt_station_names: String?
rdt_station_codes: String?
cause_nl: String
cause_en: String
statistical_cause_nl: String
statistical_cause_en: String
cause_group: String
start_time: kotlinx.datetime.LocalDateTime
end_time: kotlinx.datetime.LocalDateTime?
duration_minutes: Int?

 Looking at the schema, we can see it mostly parsed the data correctly.
 `rdt_lines_id: Double?` is a mistake though.
 
From the website: "These are the IDs of the lines linked to a disruption by Rijden de Treinen, separated by a comma."
Understandably, `"24,32"` is parsed as a `Double` instead of `String`. Let's try to nudge it into the right direction when reading the data
by supplying it with a manual type for this column.

Let's also rename it to camel case while we're at it.

In [4]:
val disruptions2023 = DataFrame.readCSV(
    fileOrUrl = "data/disruptions/disruptions-2023.csv",
    delimiter = ',',
    colTypes = mapOf("rdt_lines_id" to ColType.String),
).renameToCamelCase()

disruptions2023

rdtId,nsLines,rdtLines,rdtLinesId,rdtStationNames,rdtStationCodes,causeNl,causeEn,statisticalCauseNl,statisticalCauseEn,causeGroup,startTime,endTime,durationMinutes
45999,Amsterdam-Rotterdam-Brussel (HSL),Amsterdam Centraal - Schiphol Airport...,2432,"Amsterdam Centraal,Amsterdam Lelylaan...","ASD, ASDL, ASS, RTD, SHL",wisselstoring,points failure,wisselstoring,points failure,infrastructure,2023-01-01T08:19:26,2023-01-01T22:43:08,864
46000,Zwolle-Leeuwarden,Leeuwarden - Zwolle,160,"Heerenveen,Wolvega,Heerenveen IJsstadion","HR, WV, HRY",dier op het spoor,an animal on the railway track,dier op het spoor,an animal on the railway track,external,2023-01-01T10:31:49,2023-01-01T10:56:17,24
46001,Heerlen-Aachen Hbf,Aachen Hbf - Heerlen,130,"Aachen Hbf,Eygelshoven Markt,Heerlen,...","AHBF, EGHM, HRL, HRLK, HZ, LG, AW",beperkingen in de materieelinzet,problems with the rolling stock,beperkingen in de materieelinzet,problems with the rolling stock,rolling stock,2023-01-01T13:19:24,2023-01-02T00:02:39,643
46002,Zutphen-Winterswijk,Winterswijk - Zutphen,83,"Vorden,Zutphen","VD, ZP",aanrijding,collision,aanrijding,collision,accidents,2023-01-01T17:15:22,2023-01-01T20:14:23,179
46003,Heerlen-Aachen Hbf,Aachen Hbf - Heerlen,130,"Aachen Hbf,Eygelshoven Markt,Heerlen,...","AHBF, EGHM, HRL, HRLK, HZ, LG, AW",beperkingen in de materieelinzet,problems with the rolling stock,beperkingen in de materieelinzet,problems with the rolling stock,rolling stock,2023-01-02T05:57:27,2023-01-03T02:07:13,1210
46004,Amersfoort-Ede-Wageningen,Amersfoort - Ede-Wageningen,47,"Amersfoort Centraal,Barneveld Centrum...","AMF, BNC, BNN, ED, EDC, LTN, HVL, BNZ",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T06:36:39,2023-01-02T07:28:16,52
46005,Dordrecht-Breda; Dordrecht-Roosendaal,"Breda - Dordrecht, Dordrecht - Roosen...",170171,"Dordrecht,Dordrecht Zuid,Lage Zwaluwe","DDR, DDZD, ZLW",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T07:31:33,2023-01-02T08:09:37,38
46006,'s-Hertogenbosch-Tilburg,'s-Hertogenbosch - Tilburg,69,"'s-Hertogenbosch,Tilburg","HT, TB",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T11:33:15,2023-01-02T11:44:27,11
46007,Rotterdam-Breda (HSL),Breda - Rotterdam Centraal (HSL),15,"Breda,Rotterdam Centraal","BD, RTD",gestrande trein,stranded train,gestrande trein,stranded train,rolling stock,2023-01-02T11:50:11,2023-01-02T12:25:39,35
46008,Amsterdam-Schiphol-Rotterdam (HSL),Amsterdam Centraal - Schiphol Airport...,2432,"Amsterdam Centraal,Amsterdam Lelylaan...","ASD, ASDL, ASS, RTD, SHL",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T12:40:11,2023-01-02T13:08:08,28


In [5]:
disruptions2023.schema()

rdtId: Int
nsLines: String
rdtLines: String?
rdtLinesId: String?
rdtStationNames: String?
rdtStationCodes: String?
causeNl: String
causeEn: String
statisticalCauseNl: String
statisticalCauseEn: String
causeGroup: String
startTime: kotlinx.datetime.LocalDateTime
endTime: kotlinx.datetime.LocalDateTime?
durationMinutes: Int?

Now the schema looks better! One of the best things about using DataFrame in notebooks
is that in between cell calls type-safe accessors are generated for you!

In [6]:
disruptions2023.rdtLinesId

rdtLinesId
2432
160
130
83
130
47
170171
69
15
2432


We can actually make this hidden process visible by tracking all code that's executed under the hood.

Libraries for the Kotlin Jupyter kernel and notebooks can be very powerful as you can see!

In [7]:
%trackExecution
val dataFrame = dataFrameOf("a", "b")(1, 2)

Executing:

val dataFrame = dataFrameOf("a", "b")(1, 2)

Executing:
@DataSchema
interface _DataFrameType2 { }

val ColumnsContainer<_DataFrameType2>.a: DataColumn<Int> @JvmName("_DataFrameType2_a") get() = this["a"] as DataColumn<Int>
val DataRow<_DataFrameType2>.a: Int @JvmName("_DataFrameType2_a") get() = this["a"] as Int
val ColumnsContainer<_DataFrameType2>.b: DataColumn<Int> @JvmName("_DataFrameType2_b") get() = this["b"] as DataColumn<Int>
val DataRow<_DataFrameType2>.b: Int @JvmName("_DataFrameType2_b") get() = this["b"] as Int
(dataFrame as org.jetbrains.kotlinx.dataframe.DataFrame<*>).cast<_DataFrameType2>()
Executing:
val dataFrame = res15


In [8]:
%trackExecution off

In [9]:
val a = dataFrame.a
val b = dataFrame.b

a

a
1


Anyway, let's get back to our data!

Let's remove the columns we don't need and convert and rename some others.

In [10]:
// before
disruptions2023

rdtId,nsLines,rdtLines,rdtLinesId,rdtStationNames,rdtStationCodes,causeNl,causeEn,statisticalCauseNl,statisticalCauseEn,causeGroup,startTime,endTime,durationMinutes
45999,Amsterdam-Rotterdam-Brussel (HSL),Amsterdam Centraal - Schiphol Airport...,2432,"Amsterdam Centraal,Amsterdam Lelylaan...","ASD, ASDL, ASS, RTD, SHL",wisselstoring,points failure,wisselstoring,points failure,infrastructure,2023-01-01T08:19:26,2023-01-01T22:43:08,864
46000,Zwolle-Leeuwarden,Leeuwarden - Zwolle,160,"Heerenveen,Wolvega,Heerenveen IJsstadion","HR, WV, HRY",dier op het spoor,an animal on the railway track,dier op het spoor,an animal on the railway track,external,2023-01-01T10:31:49,2023-01-01T10:56:17,24
46001,Heerlen-Aachen Hbf,Aachen Hbf - Heerlen,130,"Aachen Hbf,Eygelshoven Markt,Heerlen,...","AHBF, EGHM, HRL, HRLK, HZ, LG, AW",beperkingen in de materieelinzet,problems with the rolling stock,beperkingen in de materieelinzet,problems with the rolling stock,rolling stock,2023-01-01T13:19:24,2023-01-02T00:02:39,643
46002,Zutphen-Winterswijk,Winterswijk - Zutphen,83,"Vorden,Zutphen","VD, ZP",aanrijding,collision,aanrijding,collision,accidents,2023-01-01T17:15:22,2023-01-01T20:14:23,179
46003,Heerlen-Aachen Hbf,Aachen Hbf - Heerlen,130,"Aachen Hbf,Eygelshoven Markt,Heerlen,...","AHBF, EGHM, HRL, HRLK, HZ, LG, AW",beperkingen in de materieelinzet,problems with the rolling stock,beperkingen in de materieelinzet,problems with the rolling stock,rolling stock,2023-01-02T05:57:27,2023-01-03T02:07:13,1210
46004,Amersfoort-Ede-Wageningen,Amersfoort - Ede-Wageningen,47,"Amersfoort Centraal,Barneveld Centrum...","AMF, BNC, BNN, ED, EDC, LTN, HVL, BNZ",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T06:36:39,2023-01-02T07:28:16,52
46005,Dordrecht-Breda; Dordrecht-Roosendaal,"Breda - Dordrecht, Dordrecht - Roosen...",170171,"Dordrecht,Dordrecht Zuid,Lage Zwaluwe","DDR, DDZD, ZLW",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T07:31:33,2023-01-02T08:09:37,38
46006,'s-Hertogenbosch-Tilburg,'s-Hertogenbosch - Tilburg,69,"'s-Hertogenbosch,Tilburg","HT, TB",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T11:33:15,2023-01-02T11:44:27,11
46007,Rotterdam-Breda (HSL),Breda - Rotterdam Centraal (HSL),15,"Breda,Rotterdam Centraal","BD, RTD",gestrande trein,stranded train,gestrande trein,stranded train,rolling stock,2023-01-02T11:50:11,2023-01-02T12:25:39,35
46008,Amsterdam-Schiphol-Rotterdam (HSL),Amsterdam Centraal - Schiphol Airport...,2432,"Amsterdam Centraal,Amsterdam Lelylaan...","ASD, ASDL, ASS, RTD, SHL",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T12:40:11,2023-01-02T13:08:08,28


In [11]:
import kotlin.time.Duration.Companion.minutes

val df1 = disruptions2023

    // we remove nsLines, dutch columns, and causeEn (as statisticalCauseEn is better according to the docs)
    .remove { nsLines and nameEndsWith("Nl") and causeEn }

    // let's also remove some rows where durationMinutes == null
    .dropNulls { durationMinutes }
    
    // Parsing minutes into kotlin.time.Duration and creating an extra date column
    .add {
        "duration" from { durationMinutes!!.minutes }
        "date" from { startTime.date }
    }

    // renaming columns to remove "rdt" and "En" from the beginning and end
    .rename { all() }.into {
        it.name
            .removePrefix("rdt")
            .replaceFirstChar { it.lowercase() }
            .removeSuffix("En")
    }

df1

id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
45999,Amsterdam Centraal - Schiphol Airport...,2432,"Amsterdam Centraal,Amsterdam Lelylaan...","ASD, ASDL, ASS, RTD, SHL",points failure,infrastructure,2023-01-01T08:19:26,2023-01-01T22:43:08,864,14h 24m,2023-01-01
46000,Leeuwarden - Zwolle,160,"Heerenveen,Wolvega,Heerenveen IJsstadion","HR, WV, HRY",an animal on the railway track,external,2023-01-01T10:31:49,2023-01-01T10:56:17,24,24m,2023-01-01
46001,Aachen Hbf - Heerlen,130,"Aachen Hbf,Eygelshoven Markt,Heerlen,...","AHBF, EGHM, HRL, HRLK, HZ, LG, AW",problems with the rolling stock,rolling stock,2023-01-01T13:19:24,2023-01-02T00:02:39,643,10h 43m,2023-01-01
46002,Winterswijk - Zutphen,83,"Vorden,Zutphen","VD, ZP",collision,accidents,2023-01-01T17:15:22,2023-01-01T20:14:23,179,2h 59m,2023-01-01
46003,Aachen Hbf - Heerlen,130,"Aachen Hbf,Eygelshoven Markt,Heerlen,...","AHBF, EGHM, HRL, HRLK, HZ, LG, AW",problems with the rolling stock,rolling stock,2023-01-02T05:57:27,2023-01-03T02:07:13,1210,20h 10m,2023-01-02
46004,Amersfoort - Ede-Wageningen,47,"Amersfoort Centraal,Barneveld Centrum...","AMF, BNC, BNN, ED, EDC, LTN, HVL, BNZ",broken down train,rolling stock,2023-01-02T06:36:39,2023-01-02T07:28:16,52,52m,2023-01-02
46005,"Breda - Dordrecht, Dordrecht - Roosen...",170171,"Dordrecht,Dordrecht Zuid,Lage Zwaluwe","DDR, DDZD, ZLW",broken down train,rolling stock,2023-01-02T07:31:33,2023-01-02T08:09:37,38,38m,2023-01-02
46006,'s-Hertogenbosch - Tilburg,69,"'s-Hertogenbosch,Tilburg","HT, TB",broken down train,rolling stock,2023-01-02T11:33:15,2023-01-02T11:44:27,11,11m,2023-01-02
46007,Breda - Rotterdam Centraal (HSL),15,"Breda,Rotterdam Centraal","BD, RTD",stranded train,rolling stock,2023-01-02T11:50:11,2023-01-02T12:25:39,35,35m,2023-01-02
46008,Amsterdam Centraal - Schiphol Airport...,2432,"Amsterdam Centraal,Amsterdam Lelylaan...","ASD, ASDL, ASS, RTD, SHL",broken down train,rolling stock,2023-01-02T12:40:11,2023-01-02T13:08:08,28,28m,2023-01-02


Almost perfect! However, we still have some list-like columns. We can split those into lists to make them more manageable.

In [12]:
val df2 = df1
    .split {
        cols(lines, linesId, stationNames, stationCodes)
    }.by(",").inplace()
    .convert { linesId.cast<List<String>>() }.with { it.map { it.toInt() } }

df2

id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
45999,[Amsterdam Centraal - Schiphol Airpor...,"[24, 32]","[Amsterdam Centraal, Amsterdam Lelyla...","[ASD, ASDL, ASS, RTD, SHL]",points failure,infrastructure,2023-01-01T08:19:26,2023-01-01T22:43:08,864,14h 24m,2023-01-01
46000,[Leeuwarden - Zwolle],[160],"[Heerenveen, Wolvega, Heerenveen IJss...","[HR, WV, HRY]",an animal on the railway track,external,2023-01-01T10:31:49,2023-01-01T10:56:17,24,24m,2023-01-01
46001,[Aachen Hbf - Heerlen],[130],"[Aachen Hbf, Eygelshoven Markt, Heerl...","[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-01T13:19:24,2023-01-02T00:02:39,643,10h 43m,2023-01-01
46002,[Winterswijk - Zutphen],[83],"[Vorden, Zutphen]","[VD, ZP]",collision,accidents,2023-01-01T17:15:22,2023-01-01T20:14:23,179,2h 59m,2023-01-01
46003,[Aachen Hbf - Heerlen],[130],"[Aachen Hbf, Eygelshoven Markt, Heerl...","[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-02T05:57:27,2023-01-03T02:07:13,1210,20h 10m,2023-01-02
46004,[Amersfoort - Ede-Wageningen],[47],"[Amersfoort Centraal, Barneveld Centr...","[AMF, BNC, BNN, ED, EDC, LTN, HVL, BNZ]",broken down train,rolling stock,2023-01-02T06:36:39,2023-01-02T07:28:16,52,52m,2023-01-02
46005,"[Breda - Dordrecht, Dordrecht - Roose...","[170, 171]","[Dordrecht, Dordrecht Zuid, Lage Zwal...","[DDR, DDZD, ZLW]",broken down train,rolling stock,2023-01-02T07:31:33,2023-01-02T08:09:37,38,38m,2023-01-02
46006,['s-Hertogenbosch - Tilburg],[69],"['s-Hertogenbosch, Tilburg]","[HT, TB]",broken down train,rolling stock,2023-01-02T11:33:15,2023-01-02T11:44:27,11,11m,2023-01-02
46007,[Breda - Rotterdam Centraal (HSL)],[15],"[Breda, Rotterdam Centraal]","[BD, RTD]",stranded train,rolling stock,2023-01-02T11:50:11,2023-01-02T12:25:39,35,35m,2023-01-02
46008,[Amsterdam Centraal - Schiphol Airpor...,"[24, 32]","[Amsterdam Centraal, Amsterdam Lelyla...","[ASD, ASDL, ASS, RTD, SHL]",broken down train,rolling stock,2023-01-02T12:40:11,2023-01-02T13:08:08,28,28m,2023-01-02


In [13]:
df2.schema()

id: Int
lines: List<String>
linesId: List<Int>
stationNames: List<String>
stationCodes: List<String>
statisticalCause: String
causeGroup: String
startTime: kotlinx.datetime.LocalDateTime
endTime: kotlinx.datetime.LocalDateTime
durationMinutes: Int
duration: time.Duration
date: kotlinx.datetime.LocalDate

Done! Now let's get to work! We can find all sorts of interesting stuff:

  - What's the longest delay duration in 2023? (clicking in the table)
  - What track had the most delays in 2023?
  - Do I have the right to complain about Dutch trains in demos?
  - What causes delays?

## Cause groups

I'm actually quite interested in these causes and what makes up a "cause group".
Let's find all groups and see what causes are inside :)

Note the nested DataFrames :)

In [14]:
df2
    .groupBy { causeGroup }.aggregate {
        statisticalCause.valueCounts() into "statisticalCauses"
    }
    .sortByDesc { 
        expr { getFrameColumn("statisticalCauses").count() } 
    }

causeGroup,statisticalCauses
statisticalCause,count
statisticalCause,count
statisticalCause,count
statisticalCause,count
statisticalCause,count
statisticalCause,count
statisticalCause,count
statisticalCause,count
statisticalCause,count
infrastructure,DataFrame [13 x 2]statisticalCausecountsignalling and points failure289points failure229signal failure163damaged overhead wires104defective railway track87... showing only top 5 of 13 rows
statisticalCause,count
signalling and points failure,289
points failure,229
signal failure,163
damaged overhead wires,104
defective railway track,87
external,DataFrame [18 x 2]statisticalCausecountan emergency call151person on the railway track146people on the railway track72police action46fire alarm35... showing only top 5 of 18 rows
statisticalCause,count
an emergency call,151

statisticalCause,count
signalling and points failure,289
points failure,229
signal failure,163
damaged overhead wires,104
defective railway track,87

statisticalCause,count
an emergency call,151
person on the railway track,146
people on the railway track,72
police action,46
fire alarm,35

statisticalCause,count
broken down train,1704
stranded train,134
problems with the rolling stock,81
defective trains,2
the use of alternative train units,1

statisticalCause,count
collision,493
damaged railway bridge,37
damaged level crossing,7

statisticalCause,count
logistical limitations,99
disruption elsewhere,78
railway problems abroad,39
an earlier disruption,14
excessive delays,1

statisticalCause,count
repair works,146
over-running engineering works,47
engineering works,9

statisticalCause,count
staffing problems,160
strike of Arriva staff,76
strike of Keolis staff,36
staff strikes abroad,24
strike of Connexxion staff,11

statisticalCause,count
technical investigation,70
multiple disruptions,7

statisticalCause,count
weather circumstances,25
overhead wires covered with frost,11
lightning strike,9
slippery railway tracks,6
an amended timetable,3


## Which line had the most delays?

In [15]:
val byLines = df2
    .explode { lines }
    .groupBy { lines }

byLines.count().sortByDesc("count")

lines,count
Amsterdam Centraal - Schiphol Airport,258
Rotterdam Centraal - Schiphol Airport...,248
Amersfoort - Schiphol Airport,209
Leiden Centraal - Schiphol Airport,200
Lelystad Centrum - Schiphol Airport,194
Breda - Rotterdam Centraal (HSL),165
Den Haag HS - Rotterdam Centraal,165
Schiphol Airport - Utrecht Centraal,160
Amsterdam Centraal - Utrecht Centraal,149
Dordrecht - Rotterdam Centraal,147


Well, what a surprise that was!

Let's get some more information about the duration of the delay, because just a count doesn't tell the whole story.

In [16]:
byLines.aggregate {
    duration.describe().first() into "duration"
}

lines,duration,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0
Unnamed: 0_level_1,name,type,count,unique,nulls,top,freq,min,median,max
Amsterdam Centraal - Schiphol Airport,duration,Duration,258,166,0,31m,5,0s,1h 5m,2d 4h 33m
Rotterdam Centraal - Schiphol Airport...,duration,Duration,248,154,0,6m,7,0s,50m,13d 7h 1m
Leeuwarden - Zwolle,duration,Duration,112,89,0,8m,3,2m,1h 8m,27d 20h 19m
Aachen Hbf - Heerlen,duration,Duration,87,78,0,6m,3,2m,5h 44m,9d 13h 51m
Winterswijk - Zutphen,duration,Duration,43,41,0,3h 24m,2,1m,2h 35m,3d 20h 28m
Amersfoort - Ede-Wageningen,duration,Duration,65,58,0,1h 54m,2,4m,2h 31m,3d 15h 19m
Breda - Dordrecht,duration,Duration,68,56,0,15m,3,3m,51m,1d 10h 22m
Dordrecht - Roosendaal,duration,Duration,103,81,0,33m,4,1m,50m,1d 10h 22m
's-Hertogenbosch - Tilburg,duration,Duration,47,44,0,3h 12m,3,0s,1h 22m,10h 26m
Breda - Rotterdam Centraal (HSL),duration,Duration,165,90,0,6m,6,0s,35m,20h 2m


Now, this was per line, what about per station? The data also provides the affected stations in each line:

In [17]:
val byStation = df2
    .explode { stationNames }
    .groupBy { stationNames }

byStation.count().sortByDesc("count")

stationNames,count
Rotterdam Centraal,612
Schiphol Airport,493
Amsterdam Centraal,392
Utrecht Centraal,324
Amsterdam Sloterdijk,308
Breda,289
Arnhem Centraal,264
Zwolle,260
Leiden Centraal,248
Amersfoort Centraal,221


Interesting! We have another 'winner'.

I don't know about you, but this requires some visualization, doesn't it?

Let's use Kandy, as it has excellent integration with notebooks and DataFrame.

Let's take a look at the examples: https://kotlin.github.io/kandy/examples.html

In [18]:
%use kandy

In [19]:
val top10 = byStation.sortByGroupDesc {
//    count()
//    durationMinutes.mean()
    count() * durationMinutes.median()
//    count() * durationMinutes.mean()
}.filter { it.index() < 10 }

top10

stationNames,group,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0
id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
Heerlen,"DataFrame [153 x 12]idlineslinesIdstationNamesstationCodesstatisticalCausecauseGroupstartTimeendTimedurationMinutesdurationdate46001[Aachen Hbf - Heerlen][130]Heerlen[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]problems with the rolling stockrolling stock2023-01-01T13:19:242023-01-02T00:02:3964310h 43m2023-01-0146003[Aachen Hbf - Heerlen][130]Heerlen[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]problems with the rolling stockrolling stock2023-01-02T05:57:272023-01-03T02:07:13121020h 10m2023-01-0246019[Aachen Hbf - Heerlen][130]Heerlen[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]problems with the rolling stockrolling stock2023-01-03T07:26:332023-01-04T04:34:14126821h 8m2023-01-0346025[Aachen Hbf - Heerlen][130]Heerlen[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]problems with the rolling stockrolling stock2023-01-04T05:51:172023-01-06T00:00:0325291d 18h 9m2023-01-0446049[Aachen Hbf - Heerlen][130]Heerlen[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]problems with the rolling stockrolling stock2023-01-06T05:32:172023-01-06T05:41:2799m2023-01-06... showing only top 5 of 153 rows",,,,,,,,,,
id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
46001,[Aachen Hbf - Heerlen],[130],Heerlen,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-01T13:19:24,2023-01-02T00:02:39,643,10h 43m,2023-01-01
46003,[Aachen Hbf - Heerlen],[130],Heerlen,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-02T05:57:27,2023-01-03T02:07:13,1210,20h 10m,2023-01-02
46019,[Aachen Hbf - Heerlen],[130],Heerlen,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-03T07:26:33,2023-01-04T04:34:14,1268,21h 8m,2023-01-03
46025,[Aachen Hbf - Heerlen],[130],Heerlen,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-04T05:51:17,2023-01-06T00:00:03,2529,1d 18h 9m,2023-01-04
46049,[Aachen Hbf - Heerlen],[130],Heerlen,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-06T05:32:17,2023-01-06T05:41:27,9,9m,2023-01-06
Hengelo,"DataFrame [217 x 12]idlineslinesIdstationNamesstationCodesstatisticalCausecauseGroupstartTimeendTimedurationMinutesdurationdate46080[Almelo - Hengelo][89]Hengelo[AML, AMRI, BN, HGL]points failureinfrastructure2023-01-10T13:05:432023-01-10T16:14:491893h 9m2023-01-1046081[Hengelo - Zutphen][90]Hengelo[DDN, GO, HGL, LC, ZP, HGLG]points failureinfrastructure2023-01-10T13:17:082023-01-10T16:40:482043h 24m2023-01-1046084[Almelo - Hengelo][89]Hengelo[AML, AMRI, BN, HGL]points failureinfrastructure2023-01-10T16:15:352023-01-10T16:55:013939m2023-01-1046130[Hengelo - Zutphen][90]Hengelo[DDN, GO, HGL, LC, ZP, HGLG]strike of Keolis staffstaff2023-01-13T05:38:382023-01-14T01:30:10119219h 52m2023-01-1346160[Almelo - Hengelo, Almelo - Zwolle, E...[89, 92, 95]Hengelo[AML, AMRI, BN, ES, HGL, HNO, NVD, RA...staffing problemsstaff2023-01-15T13:56:072023-01-15T20:19:173836h 23m2023-01-15... showing only top 5 of 217 rows",,,,,,,,,,
id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
46080,[Almelo - Hengelo],[89],Hengelo,"[AML, AMRI, BN, HGL]",points failure,infrastructure,2023-01-10T13:05:43,2023-01-10T16:14:49,189,3h 9m,2023-01-10

id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
46001,[Aachen Hbf - Heerlen],[130],Heerlen,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-01T13:19:24,2023-01-02T00:02:39,643,10h 43m,2023-01-01
46003,[Aachen Hbf - Heerlen],[130],Heerlen,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-02T05:57:27,2023-01-03T02:07:13,1210,20h 10m,2023-01-02
46019,[Aachen Hbf - Heerlen],[130],Heerlen,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-03T07:26:33,2023-01-04T04:34:14,1268,21h 8m,2023-01-03
46025,[Aachen Hbf - Heerlen],[130],Heerlen,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-04T05:51:17,2023-01-06T00:00:03,2529,1d 18h 9m,2023-01-04
46049,[Aachen Hbf - Heerlen],[130],Heerlen,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-06T05:32:17,2023-01-06T05:41:27,9,9m,2023-01-06

id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
46080,[Almelo - Hengelo],[89],Hengelo,"[AML, AMRI, BN, HGL]",points failure,infrastructure,2023-01-10T13:05:43,2023-01-10T16:14:49,189,3h 9m,2023-01-10
46081,[Hengelo - Zutphen],[90],Hengelo,"[DDN, GO, HGL, LC, ZP, HGLG]",points failure,infrastructure,2023-01-10T13:17:08,2023-01-10T16:40:48,204,3h 24m,2023-01-10
46084,[Almelo - Hengelo],[89],Hengelo,"[AML, AMRI, BN, HGL]",points failure,infrastructure,2023-01-10T16:15:35,2023-01-10T16:55:01,39,39m,2023-01-10
46130,[Hengelo - Zutphen],[90],Hengelo,"[DDN, GO, HGL, LC, ZP, HGLG]",strike of Keolis staff,staff,2023-01-13T05:38:38,2023-01-14T01:30:10,1192,19h 52m,2023-01-13
46160,"[Almelo - Hengelo, Almelo - Zwolle, E...","[89, 92, 95]",Hengelo,"[AML, AMRI, BN, ES, HGL, HNO, NVD, RA...",staffing problems,staff,2023-01-15T13:56:07,2023-01-15T20:19:17,383,6h 23m,2023-01-15

id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
46218,"[Bielefeld Hbf - Hengelo, Hengelo - O...","[91, 174]",Oldenzaal,"[BH, HGL, HGLO, ODZ, OSNH, RHEINE, BI...",strike of Keolis staff,staff,2023-01-19T03:15:06,2023-01-19T09:31:58,377,6h 17m,2023-01-19
46249,"[Berlin Ostbahnhof - Hengelo, Bielefe...","[90, 91, 124, 174]",Oldenzaal,"[DDN, GO, HGL, HGLO, LC, ODZ, ZP, HGLG]",strike of Keolis staff,staff,2023-01-20T08:07:33,2023-01-21T01:13:56,1026,17h 6m,2023-01-20
46335,"[Berlin Ostbahnhof - Hengelo, Bielefe...","[90, 91, 124, 174]",Oldenzaal,"[DDN, GO, HGL, HGLO, LC, ODZ, ZP, HGLG]",staffing problems,staff,2023-01-27T13:31:31,2023-01-28T00:38:52,667,11h 7m,2023-01-27
46444,"[Berlin Ostbahnhof - Hengelo, Bielefe...","[90, 91, 124, 174]",Oldenzaal,"[DDN, GO, HGL, HGLO, LC, ODZ, ZP, HGLG]",staffing problems,staff,2023-02-04T14:43:15,2023-02-04T23:59:52,557,9h 17m,2023-02-04
46457,"[Berlin Ostbahnhof - Hengelo, Bielefe...","[90, 91, 124, 174]",Oldenzaal,"[DDN, GO, HGL, HGLO, LC, ODZ, ZP, HGLG]",logistical limitations,logistical,2023-02-05T14:32:39,2023-02-05T23:48:01,555,9h 15m,2023-02-05

id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
46218,"[Bielefeld Hbf - Hengelo, Hengelo - O...","[91, 174]",Hengelo Oost,"[BH, HGL, HGLO, ODZ, OSNH, RHEINE, BI...",strike of Keolis staff,staff,2023-01-19T03:15:06,2023-01-19T09:31:58,377,6h 17m,2023-01-19
46249,"[Berlin Ostbahnhof - Hengelo, Bielefe...","[90, 91, 124, 174]",Hengelo Oost,"[DDN, GO, HGL, HGLO, LC, ODZ, ZP, HGLG]",strike of Keolis staff,staff,2023-01-20T08:07:33,2023-01-21T01:13:56,1026,17h 6m,2023-01-20
46335,"[Berlin Ostbahnhof - Hengelo, Bielefe...","[90, 91, 124, 174]",Hengelo Oost,"[DDN, GO, HGL, HGLO, LC, ODZ, ZP, HGLG]",staffing problems,staff,2023-01-27T13:31:31,2023-01-28T00:38:52,667,11h 7m,2023-01-27
46444,"[Berlin Ostbahnhof - Hengelo, Bielefe...","[90, 91, 124, 174]",Hengelo Oost,"[DDN, GO, HGL, HGLO, LC, ODZ, ZP, HGLG]",staffing problems,staff,2023-02-04T14:43:15,2023-02-04T23:59:52,557,9h 17m,2023-02-04
46457,"[Berlin Ostbahnhof - Hengelo, Bielefe...","[90, 91, 124, 174]",Hengelo Oost,"[DDN, GO, HGL, HGLO, LC, ODZ, ZP, HGLG]",logistical limitations,logistical,2023-02-05T14:32:39,2023-02-05T23:48:01,555,9h 15m,2023-02-05

id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
46060,[Kampen - Zwolle],[49],Zwolle,"[KPN, ZL, ZLSH]",staffing problems,staff,2023-01-07T09:15:50,2023-01-07T10:31:07,75,1h 15m,2023-01-07
46064,"[Groningen - Zwolle, Leeuwarden - Zwo...","[146, 160]",Zwolle,"[MP, ZL]",collision,accidents,2023-01-07T20:35:58,2023-01-08T01:51:56,316,5h 16m,2023-01-07
46069,"[Groningen - Zwolle, Leeuwarden - Zwo...","[146, 160]",Zwolle,"[MP, ZL]",people on the railway track,external,2023-01-09T09:49:42,2023-01-09T10:08:09,18,18m,2023-01-09
46094,[Kampen - Zwolle],[49],Zwolle,"[KPN, ZL, ZLSH]",signal failure,infrastructure,2023-01-11T10:48,2023-01-11T11:32:39,45,45m,2023-01-11
46110,[Deventer - Zwolle],[94],Zwolle,"[DV, OST, WH, ZL]",broken down train,rolling stock,2023-01-12T11:06:03,2023-01-12T11:24:44,19,19m,2023-01-12

id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
46220,"[Liège-Guillemins - Maastricht, Maast...","[73, 74, 75, 121]",Maastricht,"[BDE, BK, EC, LUT, MT, MTR, RM, SRN, ...",strike of Arriva staff,staff,2023-01-19T03:26:19,2023-01-20T16:18:44,2212,1d 12h 52m,2023-01-19
46259,"[Liège-Guillemins - Maastricht, Maast...","[73, 74, 75, 121]",Maastricht,"[BDE, BK, EC, LUT, MT, MTR, RM, SRN, ...",strike of Arriva staff,staff,2023-01-20T16:20:21,2023-01-21T01:44:42,564,9h 24m,2023-01-20
46274,"[Heerlen - Kerkrade Centrum, Heerlen ...","[75, 76, 78, 121]",Maastricht,"[CVM, EGH, HRL, HRLW, HRLK, KMR, KRD,...",weather circumstances,weather,2023-01-21T09:59:42,2023-01-21T13:15:13,196,3h 16m,2023-01-21
46368,[Maastricht - Sittard],[74],Maastricht,"[BDE, BK, LUT, MT, STD]",collision,accidents,2023-01-30T13:12:37,2023-01-30T16:54:39,222,3h 42m,2023-01-30
46458,"[Liège-Guillemins - Maastricht, Maast...","[75, 121]",Maastricht,"[BRESSX, EDN, FVS, LUIK, MT, MTR]",staff strikes abroad,staff,2023-02-05T15:39:26,2023-02-06T00:18:05,519,8h 39m,2023-02-05

id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
45999,[Amsterdam Centraal - Schiphol Airpor...,"[24, 32]",Amsterdam Centraal,"[ASD, ASDL, ASS, RTD, SHL]",points failure,infrastructure,2023-01-01T08:19:26,2023-01-01T22:43:08,864,14h 24m,2023-01-01
46008,[Amsterdam Centraal - Schiphol Airpor...,"[24, 32]",Amsterdam Centraal,"[ASD, ASDL, ASS, RTD, SHL]",broken down train,rolling stock,2023-01-02T12:40:11,2023-01-02T13:08:08,28,28m,2023-01-02
46011,[Amsterdam Centraal - Utrecht Centraal],[136],Amsterdam Centraal,"[AC, ASA, ASB, ASD, ASDM, ASHD, BKL, ...",an animal on the railway track,external,2023-01-02T16:05:42,2023-01-02T17:29:03,83,1h 23m,2023-01-02
46018,"[Amersfoort - Amsterdam Centraal, Ams...","[135, 145]",Amsterdam Centraal,"[ASD, ASDM, ASSP, DMN, WP]",damaged overhead wires,infrastructure,2023-01-03T05:57:26,2023-01-03T06:15:27,18,18m,2023-01-03
46061,[Amsterdam Centraal - Lelystad Centrum],[145],Amsterdam Centraal,"[ALM, ALMM, ASD, ASDM, ASSP, DMN, WP,...",broken down train,rolling stock,2023-01-07T14:22:30,2023-01-07T15:28:09,66,1h 6m,2023-01-07

id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
46218,"[Bielefeld Hbf - Hengelo, Hengelo - O...","[91, 174]",Bad Bentheim,"[BH, HGL, HGLO, ODZ, OSNH, RHEINE, BI...",strike of Keolis staff,staff,2023-01-19T03:15:06,2023-01-19T09:31:58,377,6h 17m,2023-01-19
46467,"[Berlin Ostbahnhof - Hengelo, Bielefe...","[91, 124, 174]",Bad Bentheim,"[BH, HGL, HGLO, ODZ]",strike of Keolis staff,staff,2023-02-06T02:46:22,2023-02-06T07:26:14,280,4h 40m,2023-02-06
46499,"[Bielefeld Hbf - Hengelo, Hengelo - O...","[91, 174]",Bad Bentheim,"[BH, HGL, HGLO, ODZ, OSNH, RHEINE, BI...",strike of Keolis staff,staff,2023-02-07T11:35:06,2023-02-11T01:36:14,5161,3d 14h 1m,2023-02-07
46511,"[Berlin Ostbahnhof - Hengelo, Bielefe...","[91, 124, 174]",Bad Bentheim,"[BH, HGL, HGLO, ODZ]",broken down train,rolling stock,2023-02-08T07:09:15,2023-02-08T07:20:59,12,12m,2023-02-08
46632,"[Bielefeld Hbf - Hengelo, Hengelo - O...","[91, 174]",Bad Bentheim,"[BH, HGL, HGLO, ODZ, OSNH, RHEINE, BI...",problems with the rolling stock,rolling stock,2023-02-18T06:38:21,2023-02-18T07:19:58,42,42m,2023-02-18

id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
45999,[Amsterdam Centraal - Schiphol Airpor...,"[24, 32]",Rotterdam Centraal,"[ASD, ASDL, ASS, RTD, SHL]",points failure,infrastructure,2023-01-01T08:19:26,2023-01-01T22:43:08,864,14h 24m,2023-01-01
46007,[Breda - Rotterdam Centraal (HSL)],[15],Rotterdam Centraal,"[BD, RTD]",stranded train,rolling stock,2023-01-02T11:50:11,2023-01-02T12:25:39,35,35m,2023-01-02
46008,[Amsterdam Centraal - Schiphol Airpor...,"[24, 32]",Rotterdam Centraal,"[ASD, ASDL, ASS, RTD, SHL]",broken down train,rolling stock,2023-01-02T12:40:11,2023-01-02T13:08:08,28,28m,2023-01-02
46022,[Rotterdam Centraal - Schiphol Airpor...,[24],Rotterdam Centraal,"[RTD, SHL]",broken down train,rolling stock,2023-01-03T14:54:42,2023-01-03T15:02:54,8,8m,2023-01-03
46030,[Breda - Rotterdam Centraal (HSL)],[15],Rotterdam Centraal,"[BD, RTD]",person on the railway track,external,2023-01-04T09:15:07,2023-01-04T12:37:35,202,3h 22m,2023-01-04

id,lines,linesId,stationNames,stationCodes,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
46001,[Aachen Hbf - Heerlen],[130],Herzogenrath,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-01T13:19:24,2023-01-02T00:02:39,643,10h 43m,2023-01-01
46003,[Aachen Hbf - Heerlen],[130],Herzogenrath,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-02T05:57:27,2023-01-03T02:07:13,1210,20h 10m,2023-01-02
46019,[Aachen Hbf - Heerlen],[130],Herzogenrath,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-03T07:26:33,2023-01-04T04:34:14,1268,21h 8m,2023-01-03
46025,[Aachen Hbf - Heerlen],[130],Herzogenrath,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-04T05:51:17,2023-01-06T00:00:03,2529,1d 18h 9m,2023-01-04
46049,[Aachen Hbf - Heerlen],[130],Herzogenrath,"[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",problems with the rolling stock,rolling stock,2023-01-06T05:32:17,2023-01-06T05:41:27,9,9m,2023-01-06


In [20]:
top10.boxplot {
    x(stationNames named "name")
    y(durationMinutes)
}.configure {
    y { scale = continuous(transform = Transformation.LOG10) }

    layout {
        size = 1000 to 500
    }
}

## Do I have the right to complain about Dutch trains in a demo?