Referencing nuget packages

In [3]:
#r "nuget:XPlot.Plotly"
#r "nuget:Deedle"

Main code

In [69]:
using Deedle;
using System.IO;
using System;
using System.Globalization;
using System.Linq;
using System.Collections.Generic;

Frame<int, string> LoadData(string folder, string lineName)
{
    var frame = 
        Frame
            .ReadCsv(string.Format(@"Data\Samples\{1}\Departures-{0}.csv", lineName, folder))
            .Where(kvp => !string.IsNullOrEmpty(kvp.Value.GetAs<string>("ScheduledArrivalTime")));

    display($"Successfully read data: {frame.RowCount} records");
    
    var sa = frame.GetColumn<string>("ScheduledArrivalTime").Values.ToArray();
    var aa = frame.GetColumn<string>("ArrivalTime").Values.ToArray();
    var deviation = sa.Select((s, i) => (int)(DateTime.Parse(aa[i]) - DateTime.Parse(s)).TotalSeconds).ToArray();
    frame.AddColumn("Deviation", deviation);    
    
    return frame;
}

In [82]:
public class StatsValues
{
    public double Min      { get; set; }
    public double Max      { get; set; }
    public double Mean     { get; set; }
    public double Median   { get; set; }
    public double StdDev   { get; set; }
    public string Folder   { get; set; }
    public string LineName { get; set; }
}

StatsValues CalcStats(Series<int, int> series, string folder, string lineName) => new StatsValues 
{
    Min      = Stats.min(series),
    Max      = Stats.max(series),
    Mean     = Stats.mean(series),
    StdDev   = Stats.stdDev(series),
    Median   = Stats.median(series),
    Folder   = folder,
    LineName = lineName,
};

In [70]:
var sources = new[]
{
    ("BadLines",       new[] { 490, 675, 677, 681, 671 } ),
    ("GoodLines",      new[] { 783, 834, 782, 837, 785 } ),
    ("InnerCityLines", new[] { 403, 505, 216, 906, 302 } ),
};

In [72]:
var allFrames =
    sources
        .SelectMany(x => x.Item2.Select(ln => new { folder = x.Item1, lineName = ln.ToString() }))
        .ToArray()
        .Select(x => new { frame = LoadData(x.folder, x.lineName), x.folder, x.lineName })
        .ToDictionary(x => x.lineName);

Successfully read data: 16114 records

Successfully read data: 10756 records

Successfully read data: 27804 records

Successfully read data: 50754 records

Successfully read data: 54641 records

Successfully read data: 17251 records

Successfully read data: 231720 records

Successfully read data: 154610 records

Successfully read data: 55637 records

Successfully read data: 164520 records

Successfully read data: 3104 records

Successfully read data: 45695 records

Successfully read data: 507730 records

Successfully read data: 541946 records

Successfully read data: 304044 records

In [197]:
var allStats = 
    allFrames
        .Values
        .Select(x => CalcStats(x.frame.GetColumn<int>("Deviation"), x.folder, x.lineName))
        .OrderBy(x => x.Mean)
        .ToArray();
display(allStats);
var allStatsFrame = Frame.FromRecords(allStats);

index,Min,Max,Mean,Median,StdDev,Folder,LineName
0,-881,2341,60.06255430060817,89,243.643919776572,BadLines,490
1,-538,3349,66.14143041237114,-34,417.53698831794,InnerCityLines,403
2,-349,1780,70.54125851501699,46,144.65238568710708,GoodLines,837
3,-4448,1887,91.17078456870397,45,224.2982717043588,BadLines,681
4,-2534,7659,103.98105715329572,63,247.38620040520533,InnerCityLines,906
5,-947,5146,106.67408664535397,65,241.4356113285932,InnerCityLines,302
6,-1153,4225,107.02586716270926,57,290.7971000078602,InnerCityLines,505
7,-4722,5029,116.00091921284309,70,199.58899838047952,GoodLines,834
8,-692,1815,137.2573184163237,124,228.1809293259011,GoodLines,783
9,-170,1712,140.37086277426553,105,195.7610331840488,BadLines,675


In [73]:
var folder = "BadLines";
var lineName = "681";

//var frame = LoadData(folder, lineName);
var frame = allFrames[lineName].frame;

In [32]:
(int, double, double) CalcBuckets(Series<int, int> series)
{
    var bucketsNumber = 100;
    var bucketSize = (double)(maxD - minD) / bucketsNumber;

    int getBucket(int value) => (int)(((double)value - minD) / bucketSize);

    var distribution = series.GroupBy(kvp => getBucket(kvp.Value)).Select(kvp => kvp.Value.KeyCount);
    var maxBucket = distribution.Values.Max();    
    
    return (bucketsNumber, bucketSize, maxBucket);
}

Filtering function

In [43]:
static Frame<int, string> Filter(this Frame<int, string> frame, params Func<KeyValuePair<int, ObjectSeries<string>>, bool>[] conditions)
{
    var result = frame;
    foreach (var condition in conditions)
        result = result.Where(condition);

    return result;
}

In [199]:
using XPlot.Plotly;

PlotlyChart MakePlot(Series<int, int> series, string forWhat)
{
    var title = $"Distribution of Arrivals Deviation from Schedule (in sec) for {forWhat}";
    
    var stats = CalcStats(series, "", "");
    var (bucketsNumber, bucketSize, maxBucket) = CalcBuckets(series);
    
    display(stats);
    
    var hist = 
        new Graph.Histogram
        {
            x = series.Values, 
            xbins   = new Graph.Xbins { start = stats.Min, end = stats.Max, size = bucketSize }, 
            marker  = new Graph.Marker { color = "yellow", line = new Graph.Line { color = "gray", width = 1 }},
            opacity = 0.75, 
            name = "Distribution"
        };

    var zero   = new Graph.Scatter { name = "Zero",   x = new[] { 0, 0 }, y = new[] { 0, maxBucket }, };
    var mean   = new Graph.Scatter { name = "Mean",   x = new[] { stats.Mean, stats.Mean }, y = new[] { 0, maxBucket }, };
    var median = new Graph.Scatter { name = "Median", x = new[] { stats.Median, stats.Median }, y = new[] { 0, maxBucket }, };

    var stdDev = 
        new Graph.Scatter
        {
            x = new[] { stats.Mean-stats.StdDev, stats.Mean-stats.StdDev, stats.Mean+stats.StdDev, stats.Mean+stats.StdDev },
            y = new[] { maxBucket, 0, 0, maxBucket },
            name = "StdDev",
        };

    var traces = new Graph.Trace[] { hist, mean, median, stdDev, zero };

    var plot = Chart.Plot(traces);  
    plot.WithXTitle("Deviation");
    plot.WithYTitle("Numner of arrivals");
    plot.WithTitle(title);
    plot.WithWidth(1200);
    plot.WithHeight(900);
    return plot;
}

In [200]:
var direction = 2;
var stopCode = 14189;

var lineTitle = $"Line {lineName} [{folder}]";
var subTitle = $"Direction {direction}, StopCode {stopCode}";

var frame2 = 
    frame.Filter(
        kvp => kvp.Value.GetAs<int>("Direction") == direction,
        kvp => kvp.Value.GetAs<int>("IrsStopCode") == stopCode);

var deviationSeries = frame.GetColumn<int>("Deviation");

var plot = MakePlot(deviationSeries, subTitle);
display(plot);

Min,Max,Mean,Median,StdDev,Folder,LineName
-4448,1887,91.17078456870397,45,224.2982717043588,,


In [11]:
var stops = frame.GetColumn<string>("IrsStopCode").Values.Distinct().ToArray();
display(stops);

index,value
0,15219
1,14191
2,14190
3,14189
4,14188
5,14187
6,14186
7,14184
8,14183
9,14182
