[this doc on github](https://github.com/dotnet/interactive/tree/main/samples/notebooks/csharp/Samples)


In [3]:

#r "nuget:Microsoft.Data.Analysis"
#r "nuget:XPlot.Plotly.Interactive"

using Microsoft.Data.Analysis;

In [4]:
PrimitiveDataFrameColumn<DateTime> dateTimes = new PrimitiveDataFrameColumn<DateTime>("DateTimes"); // Default length is 0.
PrimitiveDataFrameColumn<int> ints = new PrimitiveDataFrameColumn<int>("Ints", 3); // Makes a column of length 3. Filled with nulls initially
StringDataFrameColumn strings = new StringDataFrameColumn("Strings", 3); // Makes a column of length 3. Filled with nulls initially

In [5]:
// Append 3 values to dateTimes
dateTimes.Append(DateTime.Parse("2019/01/01"));
dateTimes.Append(DateTime.Parse("2019/01/01"));
dateTimes.Append(DateTime.Parse("2019/01/02"));

In [6]:
DataFrame df = new DataFrame(dateTimes, ints, strings ); // This will throw if the columns are of different lengths

In [7]:
df

index,DateTimes,Ints,Strings
0,2019-01-01 00:00:00Z,<null>,<null>
1,2019-01-01 00:00:00Z,<null>,<null>
2,2019-01-02 00:00:00Z,<null>,<null>


In [8]:
// To change a value directly through df
df[0, 1] = 10; // 0 is the rowIndex, and 1 is the columnIndex. This sets the 0th value in the Ints columns to 10
df

index,DateTimes,Ints,Strings
0,2019-01-01 00:00:00Z,10,<null>
1,2019-01-01 00:00:00Z,<null>,<null>
2,2019-01-02 00:00:00Z,<null>,<null>


In [9]:
// Modify ints and strings columns by indexing
ints[1] = 100;
strings[1] = "Foo!";
df

index,DateTimes,Ints,Strings
0,2019-01-01 00:00:00Z,10,<null>
1,2019-01-01 00:00:00Z,100,Foo!
2,2019-01-02 00:00:00Z,<null>,<null>


In [10]:
// Indexing can throw when types don't match.
// ints[1] = "this will throw because I am a string";  
// Info can be used to figure out the type of data in a column. 
df.Info()

index,Info,DateTimes,Ints,Strings
0,DataType,System.DateTime,System.Int32,System.String
1,Length (excluding null values),3,2,3


In [11]:
// Add 5 to ints through the DataFrame
df["Ints"].Add(5, inPlace: true);
df

index,DateTimes,Ints,Strings
0,2019-01-01 00:00:00Z,15,<null>
1,2019-01-01 00:00:00Z,105,Foo!
2,2019-01-02 00:00:00Z,<null>,<null>


In [12]:
// We can also use binary operators. Binary operators produce a copy, so assign it back to our Ints column 
df["Ints"] = (ints / 5) * 100;
df

index,DateTimes,Ints,Strings
0,2019-01-01 00:00:00Z,300,<null>
1,2019-01-01 00:00:00Z,2100,Foo!
2,2019-01-02 00:00:00Z,<null>,<null>


In [13]:
// Fill nulls in our columns, if any. Ints[2], Strings[0] and Strings[1] are null
df["Ints"].FillNulls(-1, inPlace: true);
df["Strings"].FillNulls("Bar", inPlace: true);
df

index,DateTimes,Ints,Strings
0,2019-01-01 00:00:00Z,300,Bar
1,2019-01-01 00:00:00Z,2100,Foo!
2,2019-01-02 00:00:00Z,-1,Bar


In [14]:
// To inspect the first row
DataFrameRow row0 = df.Rows[0];
row0

index,type,value
(values),indextypevalue0System.DateTime2019-01-01 00:00:00Z1System.Int323002System.StringBar,
index,type,value
0,System.DateTime,2019-01-01 00:00:00Z
1,System.Int32,300
2,System.String,Bar

index,type,value
0,System.DateTime,2019-01-01 00:00:00Z
1,System.Int32,300
2,System.String,Bar


In [15]:
// Filter rows based on equality
PrimitiveDataFrameColumn<bool> boolFilter = df["Strings"].ElementwiseEquals("Bar");
boolFilter

Unnamed: 0,Unnamed: 1
NullCount,0
Length,3
Name,Strings
DataType,System.Boolean
(values),"[ True, False, True ]"


In [16]:
DataFrame filtered = df.Filter(boolFilter);
filtered

index,DateTimes,Ints,Strings
0,2019-01-01 00:00:00Z,300,Bar
1,2019-01-02 00:00:00Z,-1,Bar


In [17]:
// Sort our dataframe using the Ints column
DataFrame sorted = df.OrderBy("Ints");
sorted

index,DateTimes,Ints,Strings
0,2019-01-02 00:00:00Z,-1,Bar
1,2019-01-01 00:00:00Z,300,Bar
2,2019-01-01 00:00:00Z,2100,Foo!


In [18]:
// GroupBy 
GroupBy groupBy = df.GroupBy("DateTimes");
// Count of values in each group
DataFrame groupCounts = groupBy.Count();
groupCounts

index,DateTimes,Ints,Strings
0,2019-01-01 00:00:00Z,2,2
1,2019-01-02 00:00:00Z,1,1


In [19]:
// Alternatively find the sum of the values in each group in Ints
DataFrame intsGroupSum = groupBy.Sum("Ints");
intsGroupSum

index,DateTimes,Ints
0,2019-01-01 00:00:00Z,2400
1,2019-01-02 00:00:00Z,-1


In [20]:
using XPlot.Plotly;
using System.Linq;

In [21]:
#r "nuget:MathNet.Numerics"

In [22]:
using MathNet.Numerics.Distributions;
double mean = 0;
double stdDev = 0.1;

MathNet.Numerics.Distributions.Normal normalDist = new Normal(mean, stdDev);

In [23]:
PrimitiveDataFrameColumn<double> doubles = new PrimitiveDataFrameColumn<double>("Normal Distribution", normalDist.Samples().Take(1000));
display(Chart.Plot(
    new Histogram()
    {
        x = doubles,
        nbinsx = 30
    }
));

Unnamed: 0,Unnamed: 1
Height,500
Id,963cab1d-c631-4f93-afad-bdf616c33d1a
PlotlySrc,https://cdn.plot.ly/plotly-latest.min.js
Width,900
