Referencing nuget packages

In [1]:
#r "nuget:XPlot.Plotly"
#r "nuget:Deedle"
#r "nuget:DotNetZip"
#r "nuget:NodaTime"

**Type in password**

In [2]:
var password = "type password here";

In [3]:
using System.Linq;
using System.Collections.Generic;

(string, int[])[] SingleLineSource(string folder, int lineName) => new[] { (folder, new[] { lineName }), };
(string, int[])[] AllLinesSource() => 
    new[]
    {
        ("BadLines",       new[] { 490, 675, 677, 681, 671 } ),
        ("GoodLines",      new[] { 783, 834, 782, 837, 785 } ),
        ("InnerCityLines", new[] { 403, 505, 216, 906, 302 } ),
    };

**Decide if you want to play with a single line or all of them at once. Use corresponding line of code.**

In [73]:
var sources = SingleLineSource("BadLines", 490);
//var sources = AllLinesSource();

In [74]:
//var stops = new string[] { "40866" };
var stops = new string[] {};

Main code

In [78]:
using Deedle;
using Ionic.Zip;
using System.IO;
using System;
using System.Globalization;
using NodaTime;

var tz = DateTimeZoneProviders.Tzdb.GetZoneOrNull("Australia/Melbourne");

Frame<int, string> LoadData(string folder, string lineName, string[] stops)
{
    var zip = ZipFile.Read(string.Format(@"Data/Samples/{1}/Departures-{0}.zip", lineName, folder));
    zip.Password = password;
    using (var stream = zip.Entries.First().OpenReader())
    {
        var frame = 
            Frame
                .ReadCsv(stream)
                .Where(kvp => !string.IsNullOrEmpty(kvp.Value.GetAs<string>("ScheduledArrivalTime")))
                .Where(kvp => stops.Length == 0 || stops.Contains(kvp.Value.GetAs<string>("IrsStopCode")));

        display($"Successfully read data: {frame.RowCount} records");

        var sa = frame.GetColumn<string>("ScheduledArrivalTime").Values.ToArray();
        var aa = frame.GetColumn<string>("ArrivalTime").Values.ToArray();
        var deviation = sa.Select((s, i) => (int)(DateTime.Parse(aa[i]) - DateTime.Parse(s)).TotalSeconds).ToArray();
        frame.AddColumn("Deviation", deviation);    
        var operatingDay = frame.GetColumn<DateTime>("OperatingDay").Values.Select(x => new LocalDate(x.Year, x.Month, x.Day)).ToArray();
        frame.AddColumn("OperatingDayNT", operatingDay); 
        var scheduledTimeZoned = frame.GetColumn<string>("ScheduledArrivalTime").Values.Select(x => DateTime.Parse(x, styles: DateTimeStyles.AdjustToUniversal)).Select(Instant.FromDateTimeUtc).Select(x => x.InZone(tz)).ToArray();
        frame.AddColumn("ScheduledTimeZoned", scheduledTimeZoned);    

        return frame;
    }
}

In [79]:
public class StatsValues
{
    public double Min      { get; set; }
    public double Max      { get; set; }
    public double Mean     { get; set; }
    public double Median   { get; set; }
    public double StdDev   { get; set; }
    public double NumberOfValues { get; set; }
}

public class NamedStatsValues : StatsValues
{
    public string LineName { get; set; }
    public string Title1   { get; set; }
    public string Title2   { get; set; }
    public string Title3   { get; set; }
    public string Title4   { get; set; }
}

NamedStatsValues CalcStats(Series<int, int> series, string lineName, string title1 = null, string title2 = null, string title3 = null, string title4 = null) => new NamedStatsValues 
{
    Min      = Math.Round(Stats.min(series)),
    Max      = Math.Round(Stats.max(series)),
    Mean     = Math.Round(Stats.mean(series)),
    StdDev   = Math.Round(Stats.stdDev(series)),
    Median   = Math.Round(Stats.median(series)),
    NumberOfValues = series.Values.Count(),
    LineName = lineName,
    Title1   = title1,
    Title2   = title2,
    Title3   = title3,
    Title4   = title4,
};

In [81]:
var allFrames =
    sources
        .SelectMany(x => x.Item2.Select(ln => new { folder = x.Item1, lineName = ln.ToString() }))
        .ToArray()
        .Select(x => new { frame = LoadData(x.folder, x.lineName, stops), x.folder, x.lineName })
        .ToDictionary(x => x.lineName);
display("All Loaded!");

Successfully read data: 16114 records

All Loaded!

In [58]:
var allStats = 
    allFrames
        .Values
        .Select(x => CalcStats(x.frame.GetColumn<int>("Deviation"), x.lineName, x.folder))
        .OrderBy(x => x.StdDev)
        .ToArray();
display(allStats);
var allStatsFrame = Frame.FromRecords(allStats);

index,LineName,Title1,Title2,Title3,Title4,Min,Max,Mean,Median,StdDev,NumberOfValues
0,490,BadLines,<null>,<null>,<null>,-94,2203,138,119,165,328


In [59]:
(int, double, double) CalcBuckets(Series<int, int> series)
{
    var stats = CalcStats(series, "", "");
    var bucketsNumber = 100;
    var bucketSize = (double)(stats.Max - stats.Min) / bucketsNumber;

    int getBucket(int value) => (int)(((double)value - stats.Min) / bucketSize);

    var distribution = series.GroupBy(kvp => getBucket(kvp.Value)).Select(kvp => kvp.Value.KeyCount);
    var maxBucket = distribution.Values.Max();    
    
    return (bucketsNumber, bucketSize, maxBucket);
}

Filtering function

In [60]:
static Frame<int, string> Filter(this Frame<int, string> frame, params Func<KeyValuePair<int, ObjectSeries<string>>, bool>[] conditions)
{
    var result = frame;
    foreach (var condition in conditions)
        result = result.Where(condition);

    return result;
}

In [61]:
using XPlot.Plotly;

PlotlyChart MakePlot(Series<int, int> series, string forWhat, int w, int h)
{
    var title = $"Deviation from Schedule (sec) for {forWhat}";
    
    var stats = CalcStats(series, "", "");
    var (bucketsNumber, bucketSize, maxBucket) = CalcBuckets(series);
    
    display(stats);
    
    var hist = 
        new Graph.Histogram
        {
            x = series.Values, 
            xbins   = new Graph.Xbins { start = stats.Min, end = stats.Max, size = bucketSize }, 
            marker  = new Graph.Marker { color = "yellow", line = new Graph.Line { color = "gray", width = 1 }},
            opacity = 0.75, 
            name = "Distribution"
        };

    var zero   = new Graph.Scatter { name = "Zero",   x = new[] { 0, 0 }, y = new[] { 0, maxBucket }, };
    var mean   = new Graph.Scatter { name = "Mean",   x = new[] { stats.Mean, stats.Mean }, y = new[] { 0, maxBucket }, };
    var median = new Graph.Scatter { name = "Median", x = new[] { stats.Median, stats.Median }, y = new[] { 0, maxBucket }, };

    var stdDev = 
        new Graph.Scatter
        {
            x = new[] { stats.Mean-stats.StdDev, stats.Mean-stats.StdDev, stats.Mean+stats.StdDev, stats.Mean+stats.StdDev },
            y = new[] { maxBucket, 0, 0, maxBucket },
            name = "StdDev",
        };

    var traces = new Graph.Trace[] { hist, mean, median, stdDev, zero };

    var plot = Chart.Plot(traces);  
    plot.WithXTitle("Deviation");
    plot.WithYTitle("Numner of arrivals");
    plot.WithTitle(title);
    plot.WithWidth(w);
    plot.WithHeight(h);
    return plot;
}

In [62]:
static LocalDate toLocalDate(this DateTime dt) => new LocalDate(dt.Year, dt.Month, dt.Day);

(Frame<int, string>, string) GetFrame(string lineName = null)
{
    var (frame, folder, lineName2) = 
        lineName != null 
        ? (allFrames[lineName].frame, allFrames[lineName].folder, allFrames[lineName].lineName)
        : (allFrames.First().Value.frame, allFrames.First().Value.folder, allFrames.First().Value.lineName);
    
    var title = $"{lineName2}, {folder}";
    
    return (frame, title);
}

(Frame<int, string>, string) PrepareFrame((Frame<int, string>, string) frameAndTitle, int? direction = null, int? stopCode = null, int? dayOfWeek = null, int? hrFrom = null, int? hrTo = null)
{
    var (frame, title) = frameAndTitle;
    var sw = System.Diagnostics.Stopwatch.StartNew();
    if (direction != null)
    {
        title += $" Direction {direction}";
        frame = frame.Filter(
            kvp => kvp.Value.GetAs<int>("Direction") == direction.Value);
    }
    if (stopCode != null)
    {
        title += $" StopCode {stopCode}";
        frame = frame.Filter(
            kvp => kvp.Value.GetAs<int>("IrsStopCode") == stopCode.Value);
    }
    if (dayOfWeek != null)
    {
        title += $" {(IsoDayOfWeek)dayOfWeek.Value}";
        frame = frame.Filter(
            kvp => kvp.Value.GetAs<LocalDate>("OperatingDayNT").DayOfWeek == (IsoDayOfWeek)(dayOfWeek.Value));
    }
    if (hrFrom != null)
    {
        title += $" {hrFrom} -";
        frame = frame.Filter(
            kvp => kvp.Value.GetAs<ZonedDateTime>("ScheduledTimeZoned").Hour >= hrFrom.Value);
    }
    if (hrTo != null)
    {
        title += $"- {hrTo}";
        frame = frame.Filter(
            kvp => kvp.Value.GetAs<ZonedDateTime>("ScheduledTimeZoned").Hour < hrTo.Value);
    }

    display($"Prepared {frame.RowCount} in {sw.ElapsedMilliseconds} ms");
    return (frame, title.Trim());
}

PlotlyChart CreatePlot((Frame<int, string>, string) frameAndTitle, int? w = null, int? h = null)
{
    var (frame, title) = frameAndTitle;
    var deviationSeries = frame.GetColumn<int>("Deviation");
    var plot = MakePlot(deviationSeries, title, w ?? 800, h ?? 600);
    return plot;
}


**If no parameters specified, the first loaded line will be used**

**You can specify lineName, direction, stopCode for analysis plots**

**To see the list of available stopCodes use the last cell of the notebook**

In [63]:
display(CreatePlot(PrepareFrame(GetFrame())));


//display(CreatePlot(dayOfWeek: 6));
//display(CreatePlot("837"));
//display(CreatePlot(stopCode: 40866));

Prepared 328 in 0 ms

LineName,Title1,Title2,Title3,Title4,Min,Max,Mean,Median,StdDev,NumberOfValues
,,<null>,<null>,<null>,-94,2203,138,119,165,328


In [64]:
var statsByDayOfWeek =
    allFrames.Keys
        .SelectMany(lineName => new[] { 1, 2, 3, 4, 5, 6, 7 }.SelectMany(dow => 
        {
            var (frame, title1) = GetFrame(lineName);
            var (frame2, title2) = PrepareFrame((frame, ""), dayOfWeek: dow);
            return new[] { 6, 10, 14, 18 }.Select(hrFrom =>
            {
                var (frame3, title3) = PrepareFrame((frame2, ""), hrFrom: hrFrom, hrTo: hrFrom+4);
                var deviationSeries = frame3.GetColumn<int>("Deviation");
                var stats = CalcStats(deviationSeries, lineName, title1, title2, title3);
                return stats;
            }).
            Where(x => !double.IsNaN(x.Min));
        }))
        .OrderBy(x => x.StdDev)
        .ToArray();

display(statsByDayOfWeek);

Prepared 75 in 4 ms

Prepared 26 in 6 ms

Prepared 20 in 11 ms

Prepared 26 in 13 ms

Prepared 3 in 4 ms

Prepared 53 in 9 ms

Prepared 26 in 6 ms

Prepared 8 in 11 ms

Prepared 18 in 9 ms

Prepared 1 in 4 ms

Prepared 65 in 5 ms

Prepared 27 in 12 ms

Prepared 10 in 6 ms

Prepared 27 in 7 ms

Prepared 1 in 2 ms

Prepared 63 in 6 ms

Prepared 28 in 13 ms

Prepared 15 in 11 ms

Prepared 20 in 9 ms

Prepared 0 in 6 ms

Prepared 64 in 13 ms

Prepared 28 in 14 ms

Prepared 18 in 7 ms

Prepared 17 in 11 ms

Prepared 1 in 3 ms

Prepared 8 in 1 ms

Prepared 0 in 4 ms

Prepared 6 in 4 ms

Prepared 2 in 2 ms

Prepared 0 in 3 ms

Prepared 0 in 3 ms

Prepared 0 in 16 ms

Prepared 0 in 4 ms

Prepared 0 in 4 ms

Prepared 0 in 5 ms

index,LineName,Title1,Title2,Title3,Title4,Min,Max,Mean,Median,StdDev,NumberOfValues
0,490,"490, BadLines",Tuesday,18 -- 22,<null>,287,287,287,287,,1
1,490,"490, BadLines",Wednesday,18 -- 22,<null>,16,16,16,16,,1
2,490,"490, BadLines",Friday,18 -- 22,<null>,149,149,149,149,,1
3,490,"490, BadLines",Saturday,14 -- 18,<null>,44,74,59,59,21.0,2
4,490,"490, BadLines",Saturday,10 -- 14,<null>,-8,158,63,54,61.0,6
5,490,"490, BadLines",Tuesday,6 -- 10,<null>,-94,213,44,44,66.0,26
6,490,"490, BadLines",Monday,6 -- 10,<null>,-41,218,50,40,73.0,26
7,490,"490, BadLines",Wednesday,6 -- 10,<null>,-50,285,78,64,81.0,27
8,490,"490, BadLines",Thursday,14 -- 18,<null>,17,357,152,131,93.0,20
9,490,"490, BadLines",Wednesday,14 -- 18,<null>,-85,337,132,118,94.0,27


In [65]:
var statsByBucketsFrame = Frame.FromRecords(statsByDayOfWeek);
var statsOnStats = 
    new[] { "NumberOfValues", "Min", "Max", "Mean", "Median", "StdDev" }
    .Select(x => CalcStats(statsByBucketsFrame.GetColumn<int>(x), x))
    .ToArray();

display(statsOnStats);


index,LineName,Title1,Title2,Title3,Title4,Min,Max,Mean,Median,StdDev,NumberOfValues
0,NumberOfValues,<null>,<null>,<null>,<null>,1,28,16,18,10,21
1,Min,<null>,<null>,<null>,<null>,-94,287,24,16,101,21
2,Max,<null>,<null>,<null>,<null>,16,2203,394,318,436,21
3,Mean,<null>,<null>,<null>,<null>,16,287,146,148,89,21
4,Median,<null>,<null>,<null>,<null>,12,287,124,131,81,21
5,StdDev,<null>,<null>,<null>,<null>,21,489,116,101,97,18


In [66]:
(PlotlyChart, double, double, double, double) MakeStatsPlot(Frame<int, string> frame, string subTitle, string horzCol, string vertCol, string groupCol, string textCol, Func<string, string> getColor, int w = 800, int h = 600, double maxX = 0.0, double maxY = 0.0, double minX = 0.0, double minY = 0.0)
{
    var title = $"Stats {vertCol}({horzCol})";

    var initX = new[] { minX, maxX };
    var initY = new[] { minY, maxY };
    
    var dots = 
        frame.Rows.Select(kvp => 
        {
            var x = kvp.Value.GetAs<double>(horzCol);
            var y = kvp.Value.GetAs<double>(vertCol);
            var key = kvp.Value.GetAs<string>(groupCol);
            var text = kvp.Value.GetAs<string>(textCol);
            return new { x, y, key, text };
        })
        .Values
        .ToArray();
    
    maxX = dots.Select(x => x.x).OrderBy(x => x).LastOrDefault();
    maxY = dots.Select(x => x.y).OrderBy(x => x).LastOrDefault();
    minX = dots.Select(x => x.x).OrderBy(x => x).FirstOrDefault();
    minY = dots.Select(x => x.y).OrderBy(x => x).FirstOrDefault();
    
    display(dots.Length);
    
    var traces = 
        dots
            .GroupBy(x => x.key)
            .Select(g => new Graph.Scatter 
                    { 
                        name = g.Key, 
                        x = g.Select(x => x.x).ToArray(), 
                        y = g.Select(x => x.y).ToArray(), 
                        text = g.Select(x => x.text).ToArray(),
                        mode = "markers", marker = new Graph.Marker { color = getColor(g.Key) } })
            .Concat(new[] { new Graph.Scatter { x = initX, y = initY, mode = "markers", name = "aux" } })
            .ToArray();
    
    var plot = Chart.Plot(traces);  
    plot.WithXTitle(horzCol);
    plot.WithYTitle(vertCol);
    plot.WithTitle($"{subTitle} - {title}");
    plot.WithWidth(w);
    plot.WithHeight(h);
    return (plot, maxX, maxY, minX, minY);
}

In [67]:
string getColor(string dow) =>
    dow == "Monday"    ? "red" :
    dow == "Tuesday"   ? "orange" :
    dow == "Wednesday" ? "yellow" :
    dow == "Thursday"  ? "green" :
    dow == "Friday"    ? "cyan" :
    dow == "Saturday"  ? "blue" :
    dow == "Sunday"    ? "violet" :
    "black";

var p = MakeStatsPlot(statsByBucketsFrame, "All Timeframes", "Mean", "StdDev", "Title2", "Title1", getColor, 800, 600, 0.0, 0.0, 0.0, 0.0);
var (plot1, maxx, maxy, minx, miny) = p;
display(plot1);

var subPlots = new[] { "6 -- 10", "10 -- 14", "14 -- 18", "18 -- 22" }
    .Select(tf => 
    {
        var frame = statsByBucketsFrame.Where(kvp => kvp.Value.GetAs<string>("Title3") == tf);
        var plot = MakeStatsPlot(frame, tf, "Mean", "StdDev", "Title2", "Title1", getColor, 800, 600, maxx, maxy, minx, miny).Item1;
        return plot;
    })
    .ToArray();

foreach (var sp in subPlots)
    display(sp);
            

In [68]:
string getColorByPeriod(string period) =>
    period == "6 -- 10"   ? "red" :
    period == "10 -- 14"  ? "yellow" :
    period == "14 -- 18"  ? "blue" :
    period == "18 -- 22"  ? "violet" :
    "black";

var p = MakeStatsPlot(statsByBucketsFrame, "All Days Of Week", "Mean", "StdDev", "Title3", "Title1", getColorByPeriod, 800, 600, 0.0, 0.0, 0.0, 0.0);
var (plot1, maxx, maxy, minx, miny) = p;
display(plot1);

var subPlots = new[] { "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday" }
    .Select(dow => 
    {
        var frame = statsByBucketsFrame.Where(kvp => kvp.Value.GetAs<string>("Title2") == dow);
        var plot = MakeStatsPlot(frame, dow, "Mean", "StdDev", "Title3", "Title1", getColorByPeriod, 800, 600, maxx, maxy, minx, miny).Item1;
        return plot;
    })
    .ToArray();

foreach (var sp in subPlots)
    display(sp);
            

In [69]:
string getColorByLineName(string title1) =>
    title1.EndsWith("GoodLines")      ? "blue" :
    title1.EndsWith("BadLines")       ? "red" :
    title1.EndsWith("InnerCityLines") ? "yellow" :
    "black";

var p = MakeStatsPlot(statsByBucketsFrame, "All Timeframes", "Mean", "StdDev", "Title1", "Title1", getColorByLineName, 800, 600, 0.0, 0.0, 0.0, 0.0);
var (plot1, maxx, maxy, minx, miny) = p;
display(plot1);

var subPlots = new[] { "6 -- 10", "10 -- 14", "14 -- 18", "18 -- 22" }
    .Select(tf => 
    {
        var frame = statsByBucketsFrame.Where(kvp => kvp.Value.GetAs<string>("Title3") == tf);
        var plot = MakeStatsPlot(frame, tf, "Mean", "StdDev", "Title1", "Title1", getColorByLineName, 800, 600, maxx, maxy, minx, miny).Item1;
        return plot;
    })
    .ToArray();

foreach (var sp in subPlots)
    display(sp);


In [70]:
string getColorByLineName(string title1) =>
    title1.EndsWith("GoodLines")      ? "blue" :
    title1.EndsWith("BadLines")       ? "red" :
    title1.EndsWith("InnerCityLines") ? "yellow" :
    "black";

var p = MakeStatsPlot(statsByBucketsFrame, "All Days Of Week", "Mean", "StdDev", "Title1", "Title1", getColorByLineName, 800, 600, 0.0, 0.0, 0.0, 0.0);
var (plot1, maxx, maxy, minx, miny) = p;
display(plot1);

var subPlots = new[] { "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday" }
    .Select(dow => 
    {
        var frame = statsByBucketsFrame.Where(kvp => kvp.Value.GetAs<string>("Title2") == dow);
        var plot = MakeStatsPlot(frame, dow, "Mean", "StdDev", "Title1", "Title1", getColorByLineName, 800, 600, maxx, maxy, minx, miny).Item1;
        return plot;
    })
    .ToArray();

foreach (var sp in subPlots)
    display(sp);
            

In [71]:
void DisplayStops(string lineName = null)
{
    var frame = lineName == null ? allFrames.First().Value.frame : allFrames[lineName].frame;
    var stops = frame.GetColumn<string>("IrsStopCode").Values.Distinct().ToArray();
    display(stops);
}

In [72]:
DisplayStops("490");
//DisplayStops("837");

index,value
0,40870
