Referencing nuget packages

In [14]:
#r "nuget:Csv"
#r "nuget:XPlot.Plotly"
#r "nuget:Deedle"

Main code

In [15]:
using Csv;
using System.IO;
using System;
using System.Globalization;
using System.Linq;
using System.Collections.Generic;


var folder = "GoodLines";
var lineName = "837";
var csv = File.ReadAllText(string.Format(@"Data\Samples\{1}\Departures-{0}.csv", lineName, folder));

class ArrivalInfo
{
    public DateTime OperatingDay { get; set; }
    public string IrsId { get; set; }
    public int Direction { get; set; }
    public string IrsStopCode { get; set; }
    public DateTime ArrivalTime { get; set; }
    public DateTime ScheduledArrivalTime { get; set; }
    public int Deviation => (int)(ArrivalTime - ScheduledArrivalTime).TotalSeconds;
}

var data = new List<ArrivalInfo>();

foreach (var line in CsvReader.ReadFromText(csv))
{
    var strScheduledArrivalTime = line["ScheduledArrivalTime"];
    if (string.IsNullOrEmpty(strScheduledArrivalTime))
        continue;
    
    var scheduledArrivalTime = DateTime.Parse(strScheduledArrivalTime);
    var arrivalTime = DateTime.Parse(line["ArrivalTime"]);
    
    // Header is handled, each line will contain the actual row data
    var operatingDay = DateTime.ParseExact(line["OperatingDay"], "yyyy-MM-dd", CultureInfo.InvariantCulture);
    var irsId = line["IrsId"];
    var direction = int.Parse(line["Direction"]);
    var irsStopCode = line["IrsStopCode"];
    
    var candidateRecord = new ArrivalInfo 
    {
        OperatingDay = operatingDay,
        IrsId = irsId,
        Direction = direction,
        IrsStopCode = irsStopCode,
        ArrivalTime = arrivalTime,
        ScheduledArrivalTime = scheduledArrivalTime,
    };
        
    data.Add(candidateRecord);
}

display($"Successfully read data: {data.Count} records");

Successfully read data: 55637 records

In [16]:
using Deedle;
var frame = Frame.FromRecords(data);
var deviationSeries = frame.GetColumn<int>("Deviation");

Calculate some stats

In [17]:
var minD = Stats.min(deviationSeries);
var maxD = Stats.max(deviationSeries);
var meanD = Stats.mean(deviationSeries);
var stdDevD = Stats.stdDev(deviationSeries);
var medianD = Stats.median(deviationSeries);
return (minD, maxD, meanD, medianD, stdDevD);

Item1,Item2,Item3,Item4,Item5
-349,1780,70.54125851501699,46,144.65238568710708


In [18]:
var bucketsNumber = 100;
var bucketSize = (double)(maxD - minD) / bucketsNumber;

int getBucket(int value) => (int)(((double)value - minD) / bucketSize);

var distribution = deviationSeries.GroupBy(kvp => getBucket(kvp.Value)).Select(kvp => kvp.Value.KeyCount);
var maxBucket = distribution.Values.Max();

In [19]:
using XPlot.Plotly;

In [22]:
var hist = 
    new Graph.Histogram
    {
        x = deviationSeries.Values, 
        xbins = new Graph.Xbins { start = minD, end = maxD, size = bucketSize }, 
        marker = new Graph.Marker { color = "yellow", line = new Graph.Line { color = "gray", width = 1 }},
        opacity = 0.75, 
        name = "Distribution"
    };

var zero =
    new Graph.Scatter
    {
        x = new[] { 0, 0 },
        y = new[] { 0, maxBucket },
        name = "Zero",
    };

var mean = 
    new Graph.Scatter
    {
        x = new[] { meanD, meanD },
        y = new[] { 0, maxBucket },
        name = "Mean",
    };

var median = 
    new Graph.Scatter
    {
        x = new[] { medianD, medianD },
        y = new[] { 0, maxBucket },
        name = "Median",
    };

var stdDev = 
    new Graph.Scatter
    {
        x = new[] { meanD-stdDevD, meanD-stdDevD, meanD+stdDevD, meanD+stdDevD },
        y = new[] { maxBucket, 0, 0, maxBucket },
        name = "StdDev",
    };

var traces = new Graph.Trace[] { hist, mean, median, stdDev, zero };

var plot = Chart.Plot(traces);  
plot.WithXTitle("Deviation");
plot.WithYTitle("Numner of arrivals");
plot.WithTitle($"Distribution of Arrivals Deviation from Schedule (in sec) for Line {lineName} [{folder}]");
plot

In [21]:
display($"Min: {minD}, Max: {maxD}, Mean: {meanD}, stDev: {stdDevD}");

Min: -349, Max: 1780, Mean: 70.54125851501699, stDev: 144.65238568710708