https://github.com/dotnet/try/issues/688

In [1]:
#r "nuget:Microsoft.Data.Analysis"
open Microsoft.Data.Analysis

In [2]:
#r "nuget:Accord.MachineLearning"
open Accord.MachineLearning

In [3]:
#r @"TakensTheorem.Core.dll"

In [4]:
open System
open TakensTheorem.Core
open TakensTheorem.Core.ZipHelper
open TakensTheorem.Core.DataFrameColumnOperators

In [5]:
let path = @"..\Data\historical-hourly-weather-data.zip"

In [6]:
let loader = new ZippedCsvLoader(path)

In [7]:
let cityTable = loader.ToDataFrame "city_attributes.csv"
let temperatureDF = loader.ToDataFrame "temperature.csv"
let indexName = temperatureDF.Columns.[0].Name

In [8]:
temperatureDF.[indexName] <- temperatureDF.[indexName]
                             |> Seq.cast<string>
                             |> Seq.map (DateTime.Parse)
                             |> DataFrameColumn.FromValues indexName

In [9]:
let start = DateTime(2015, 6, 22)
let stop = DateTime(2015, 8, 31)

(* 
    temperatureDF.["datetime"].ElementwiseGreaterThanOrEqual<DateTime> (start)
    don't work for no dates yet
*)
    
let dateFilterColumn = 
    !> temperatureDF.[indexName]
    |> DataFrameColumn<DateTime>.CreateFilter (fun dt -> dt.HasValue && dt.Value >= start && dt.Value <= stop)
    
let weatherDataMontreal = 
    temperatureDF.Filter(dateFilterColumn).["Montreal"]

In [10]:
temperatureDF |> DataFrame.Length
//weatherDataMontreal.Filter

45253

In [11]:
// index unavailable at column level

!> weatherDataMontreal 
|> DataFrameColumn<float>.Plot

In [12]:
(* subtracting null same as subtracting zero
 * in pandas both become NaN
 * therefore dropping na's after the fact
 * does not trim the column
 *)
let lowPassFilterSignalCol = weatherDataMontreal.Clone()

weatherDataMontreal
|> DataFrameColumn.Rolling<float> 24 (Seq.average)
|> Seq.iteri (fun i value -> lowPassFilterSignalCol.[int64 (i+12)] <- Nullable value)

let weatherDataMontrealFiltered = 
    !> (weatherDataMontreal - lowPassFilterSignalCol)
    |> DataFrameColumn.Trim 0.0

weatherDataMontrealFiltered
|> DataFrameColumn<float>.Plot

In [13]:
let embeddedWeather = 
    weatherDataMontrealFiltered
    |> DataFrameColumn.TakensEmbedding<float> 5 2    
    |> Array.ofSeq
   
Chart.Plot(Scatter(x=embeddedWeather.[0], y=embeddedWeather.[1]))

In [14]:
let embeddedWeather3 =
    weatherDataMontrealFiltered
    |> DataFrameColumn.TakensEmbedding<float> 6 3
    |> Array.ofSeq

Chart.Plot(Scatter3d(x=embeddedWeather3.[0], y=embeddedWeather3.[1], z=embeddedWeather3.[2], mode="lines"))

In [15]:
(* No drop nulls for columns *)

let k = {|
    l0 = temperatureDF.DropNulls(DropNullOptions.Any).Rows.Count
    ln = temperatureDF.Rows.Count
    l1 = weatherDataMontreal.Length
    l2 = DataFrame(weatherDataMontreal).DropNulls(DropNullOptions.Any).Rows.Count
    l3 = 
        !> weatherDataMontreal 
        |> DataFrameColumn<float>.Values
        |> Seq.filter (fun value -> value <> Nullable()) 
        |> Seq.length |> int64

    l4 = lowPassFilterSignalCol |> DataFrameColumn.Length    |}  
{| k with diff = k.l3 - k.l4 |} //l0,ln,l1,l2,l3,l4, l3-l4

diff,l0,l1,l2,l3,l4,ln
0,44430,1681,1681,1681,1681,45253


In [16]:
let inline plot' (data: ^a[]) = Chart.Plot(Scatter(y=data ))
let inline plot'' (data: ^a[][]) = Chart.Plot(Scatter(x=data.[0], y=data.[1]))
let inline plot''' (data: ^a[][]) = Chart.Plot(Scatter3d(x=data.[0], y=data.[1],z=data.[2]))

In [17]:
%%time

[|for i in 1L..20L do 
    DataFrameColumn.MutualInformation i 16 weatherDataMontrealFiltered|]
|> plot'

Wall time: 3788.6314ms

https://github.com/fslaborg/XPlot/issues/49

In [18]:

let layout = 
    Layout(
        title = "Heatmap and Time Series Plot beside each other",
        xaxis = Xaxis(),
        yaxis = Yaxis(),
        xaxis2 = Xaxis(),
        yaxis2 = Yaxis()
    )

let embeddedWeather2d_delay5 = DataFrameColumn.TakensEmbedding 5 2 weatherDataMontrealFiltered |> Array.ofSeq
let embeddedWeather2d_delay1 = DataFrameColumn.TakensEmbedding 1 2 weatherDataMontrealFiltered |> Array.ofSeq

// [    Scatter(x=embeddedWeather2d_delay5.[0], y=embeddedWeather2d_delay5.[1],  xaxis="x2", yaxis="y2")
//      Scatter(x=embeddedWeather2d_delay1.[0], y=embeddedWeather2d_delay1.[1],  xaxis="x1", yaxis="y1") ]
// |> Chart.Plot
// |> Chart.WithLayout layout

In [19]:
plot'' embeddedWeather2d_delay5

In [20]:
plot'' embeddedWeather2d_delay1

In [21]:
let nFNN = 
    [| for i in 1..6 do 
         yield float (DataFrameColumn.FalseNearestNeighbours 1 i weatherDataMontrealFiltered) 
             / float weatherDataMontrealFiltered.Length  |]
             
plot'' [|[|1. ..6.|];nFNN;|]    

In [22]:
let embeddedFinal = weatherDataMontrealFiltered |> DataFrameColumn.TakensEmbedding 1 4  |> Array.ofSeq

Array.init 4 (fun i -> Scatter(y=embeddedFinal.[i])) |> Chart.Plot

In [23]:
let inline plot''''  (indices: int[]) (data: ^a[][])= 
    Scatter3d(x=data.[indices.[0]], y=data.[indices.[1]],z=data.[indices.[2]], mode="lines")
    |> Chart.Plot
    
plot'''' [|0;1;2|] embeddedFinal |> display
plot'''' [|0;1;3|] embeddedFinal |> display
plot'''' [|1;2;3|] embeddedFinal |> display
plot'''' [|0;2;3|] embeddedFinal |> display

In [24]:
weatherDataMontrealFiltered |> DataFrameColumn.Slice(0L,10L) //.ApplyElementwise(fun x y -> x + y)

index,value
0,-2.0370416666667097
1,-2.200791666666703
2,0.8894583333332093
3,0.8471666666665669
4,1.6098749999999311
5,2.287583333333316
6,2.3107499999999845
7,2.333916666666653
8,3.818083333333391
9,3.8189583333333994


In [25]:
let temp = !> temperatureDF.["datetime"] |> DataFrameColumn<DateTime>.Slice(0L, 50L)

In [51]:
temp.ApplyElementwise (fun x y -> x.Value.AddYears(int y) |> Nullable) |> printfn "%A"
temp

<null>


index,Date,Day,DayOfWeek,DayOfYear,Hour,Kind,Millisecond,Minute,Month,Second,Ticks,TimeOfDay,Year
0,2012-10-01 00:00:00Z,1,{ System.DayOfWeek: value__: 1 },275,12,{ System.DateTimeKind: value__: 0 },0,0,10,0,634846896000000000,"{ System.TimeSpan: Ticks: 432000000000, Days: 0, Hours: 12, Milliseconds: 0, Minutes: 0, Seconds: 0, TotalDays: 0.5, TotalHours: 12, TotalMilliseconds: 43200000, TotalMinutes: 720, TotalSeconds: 43200 }",2012
1,2016-10-01 00:00:00Z,1,{ System.DayOfWeek: value__: 6 },275,13,{ System.DateTimeKind: value__: 0 },0,0,10,0,636109236000000000,"{ System.TimeSpan: Ticks: 468000000000, Days: 0, Hours: 13, Milliseconds: 0, Minutes: 0, Seconds: 0, TotalDays: 0.5416666666666666, TotalHours: 13, TotalMilliseconds: 46800000, TotalMinutes: 780, TotalSeconds: 46800 }",2016
2,2020-10-01 00:00:00Z,1,{ System.DayOfWeek: value__: 4 },275,14,{ System.DateTimeKind: value__: 0 },0,0,10,0,637371576000000000,"{ System.TimeSpan: Ticks: 504000000000, Days: 0, Hours: 14, Milliseconds: 0, Minutes: 0, Seconds: 0, TotalDays: 0.5833333333333333, TotalHours: 14, TotalMilliseconds: 50400000, TotalMinutes: 840, TotalSeconds: 50400 }",2020
3,2024-10-01 00:00:00Z,1,{ System.DayOfWeek: value__: 2 },275,15,{ System.DateTimeKind: value__: 0 },0,0,10,0,638633916000000000,"{ System.TimeSpan: Ticks: 540000000000, Days: 0, Hours: 15, Milliseconds: 0, Minutes: 0, Seconds: 0, TotalDays: 0.625, TotalHours: 15, TotalMilliseconds: 54000000, TotalMinutes: 900, TotalSeconds: 54000 }",2024
4,2028-10-01 00:00:00Z,1,{ System.DayOfWeek: value__: 0 },275,16,{ System.DateTimeKind: value__: 0 },0,0,10,0,639896256000000000,"{ System.TimeSpan: Ticks: 576000000000, Days: 0, Hours: 16, Milliseconds: 0, Minutes: 0, Seconds: 0, TotalDays: 0.6666666666666666, TotalHours: 16, TotalMilliseconds: 57600000, TotalMinutes: 960, TotalSeconds: 57600 }",2028
5,2032-10-01 00:00:00Z,1,{ System.DayOfWeek: value__: 5 },275,17,{ System.DateTimeKind: value__: 0 },0,0,10,0,641158596000000000,"{ System.TimeSpan: Ticks: 612000000000, Days: 0, Hours: 17, Milliseconds: 0, Minutes: 0, Seconds: 0, TotalDays: 0.7083333333333333, TotalHours: 17, TotalMilliseconds: 61200000, TotalMinutes: 1020, TotalSeconds: 61200 }",2032
6,2036-10-01 00:00:00Z,1,{ System.DayOfWeek: value__: 3 },275,18,{ System.DateTimeKind: value__: 0 },0,0,10,0,642420936000000000,"{ System.TimeSpan: Ticks: 648000000000, Days: 0, Hours: 18, Milliseconds: 0, Minutes: 0, Seconds: 0, TotalDays: 0.75, TotalHours: 18, TotalMilliseconds: 64800000, TotalMinutes: 1080, TotalSeconds: 64800 }",2036
7,2040-10-01 00:00:00Z,1,{ System.DayOfWeek: value__: 1 },275,19,{ System.DateTimeKind: value__: 0 },0,0,10,0,643683276000000000,"{ System.TimeSpan: Ticks: 684000000000, Days: 0, Hours: 19, Milliseconds: 0, Minutes: 0, Seconds: 0, TotalDays: 0.7916666666666666, TotalHours: 19, TotalMilliseconds: 68400000, TotalMinutes: 1140, TotalSeconds: 68400 }",2040
8,2044-10-01 00:00:00Z,1,{ System.DayOfWeek: value__: 6 },275,20,{ System.DateTimeKind: value__: 0 },0,0,10,0,644945616000000000,"{ System.TimeSpan: Ticks: 720000000000, Days: 0, Hours: 20, Milliseconds: 0, Minutes: 0, Seconds: 0, TotalDays: 0.8333333333333333, TotalHours: 20, TotalMilliseconds: 72000000, TotalMinutes: 1200, TotalSeconds: 72000 }",2044
9,2048-10-01 00:00:00Z,1,{ System.DayOfWeek: value__: 4 },275,21,{ System.DateTimeKind: value__: 0 },0,0,10,0,646207956000000000,"{ System.TimeSpan: Ticks: 756000000000, Days: 0, Hours: 21, Milliseconds: 0, Minutes: 0, Seconds: 0, TotalDays: 0.875, TotalHours: 21, TotalMilliseconds: 75600000, TotalMinutes: 1260, TotalSeconds: 75600 }",2048


In [40]:
let atlanta = !> temperatureDF.["Atlanta"] |> DataFrameColumn<float>.Slice(0L, 50L) //|> DataFrameColumn.Values |> Seq.cast<int>

In [48]:
atlanta |> DataFrameColumn.Values |> printfn "%A"

[|null; 294.03; 294.0353414; 294.0497022; 294.064063; 294.0784237; 294.0927845;
  294.1071453; 294.1215061; 294.1358668; 294.1502276; 294.1645884; 294.1789492;
  294.1933099; 294.2076707; 294.2220315; 294.2363923; 294.250753; 294.2651138;
  294.2794746; 294.2938354; 294.3081961; 294.3225569; 294.3369177; 294.3512785;
  294.3656392; 294.38; 295.01; 295.04; 295.26; 295.14; 295.46; 295.77; 296.28;
  295.97; 295.41; 295.36; 295.51; 295.16; 295.21; 295.1; 294.93; 295.04; 294.12;
  294.07; 293.51; 292.65; 292.06; 291.6; 291.5; 291.98|]


In [100]:
[]    |> DataFrameColumn.FromValues ""

Unhandled Exception: input.fsx (3,117)-(3,119) typecheck error This type parameter cannot be instantiated to 'Nullable'. This is a restriction imposed in order to ensure the meaning of 'null' in some CLI languages is not confusing when used in conjunction with 'Nullable' values.
input.fsx (3,117)-(3,119) typecheck error Type mismatch. Expecting a
    'Nullable<float> [] -> Nullable<float> []'    
but given a
    'Nullable<float> [] -> PrimitiveDataFrameColumn<Nullable<float>>'    
The type 'Nullable<float> []' does not match the type 'PrimitiveDataFrameColumn<Nullable<float>>'

In [67]:
t |> printfn "%A"

[|0.0; 1.0; 2.0; 3.0; 4.0; 5.0; 6.0; 7.0; 8.0; 9.0; 10.0; null; null; null; null;
  null; null; null; null; null; null; null; null; null; null; null; null; null;
  null; null; null; null; null; null; null; null; null; null; null; null; null;
  null; null; null; null; null; null; null; null; null|]


In [75]:
let f = Nullable()

Unhandled Exception: input.fsx (1,11)-(1,17) typecheck error The value, constructor, namespace or type 'FSharp' is not defined. Maybe you want one of the following:
   CSharp