In [1]:
#r "nuget:Microsoft.Data.Analysis"
#r "nuget:XPlot.Plotly.Interactive"
#r "nuget:MathNet.Numerics"

Loading extensions from `C:\Users\admin\.nuget\packages\xplot.plotly.interactive\4.0.6\interactive-extensions\dotnet\XPlot.Plotly.Interactive.dll`

Configuring PowerShell Kernel for XPlot.Plotly integration.

Installed support for XPlot.Plotly.

Loading extensions from `C:\Users\admin\.nuget\packages\microsoft.data.analysis\0.20.0\interactive-extensions\dotnet\Microsoft.Data.Analysis.Interactive.dll`

In [2]:
using XPlot.Plotly;
using Microsoft.Data.Analysis;
using Microsoft.AspNetCore.Html;
using Microsoft.DotNet.Interactive.Formatting;
using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;
using MathNet.Numerics.Statistics;
using System.Linq;
using System.Data;
using Histogram = XPlot.Plotly.Histogram;

In [3]:
Formatter.Register<DataFrame>(formatter: (df, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));
    var rows = new List<List<IHtmlContent>>();
    var take = 10;
    for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var obj in df.Rows[i])
        {
            cells.Add(td(obj));
        }
        rows.Add(cells);
    }

    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));

    writer.Write(t);
// }, "text/html"); =>  // original version
}, mimeType: "text/html");

In [4]:
var data = DataFrame.LoadCsv("bank.csv", separator: ',');

data

index,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,admin.,married,secondary,no,2343,yes,no,unknown,5,may,1042,1,-1,0,unknown,yes
1,56,admin.,married,secondary,no,45,no,no,unknown,5,may,1467,1,-1,0,unknown,yes
2,41,technician,married,secondary,no,1270,yes,no,unknown,5,may,1389,1,-1,0,unknown,yes
3,55,services,married,secondary,no,2476,yes,no,unknown,5,may,579,1,-1,0,unknown,yes
4,54,admin.,married,tertiary,no,184,no,no,unknown,5,may,673,2,-1,0,unknown,yes
5,42,management,single,tertiary,no,0,yes,yes,unknown,5,may,562,2,-1,0,unknown,yes
6,56,management,married,tertiary,no,830,yes,yes,unknown,6,may,1201,1,-1,0,unknown,yes
7,60,retired,divorced,secondary,no,545,yes,no,unknown,6,may,1030,1,-1,0,unknown,yes
8,37,technician,married,secondary,no,1,yes,no,unknown,6,may,608,1,-1,0,unknown,yes
9,28,services,single,secondary,no,5090,yes,no,unknown,6,may,1297,3,-1,0,unknown,yes


In [5]:
Chart.Plot(
    new Histogram
    {
        x = data.Columns["age"]
    }
)

In [6]:
Chart.Plot(
    new Histogram{
        x = data.Columns["balance"]
    }
)

In [7]:
var jobs = data.GroupBy("job").Count(); //check what this does

// bar graph because job column is catgeorical
// plotting the number of people in each job category
Chart.Plot(
    new Bar
    {
        x = jobs.Columns["job"],
        y = jobs.Columns["age"] // does not really matter which field because of Count()
    }
)

In [8]:
var jobs = data.GroupBy("job").Count();
var education = data.GroupBy("education").Count();

var barPlots = new List<Bar>
{
    new Bar
    {
        x = jobs.Columns["job"],
        y = jobs.Columns["age"],
        name = "Job"
    },
    new Bar
    {
        x = education.Columns["education"],
        y = education.Columns["age"],
        name = "Education"
    }
};

Chart.Plot(barPlots)

In [9]:
var jobs = data.GroupBy("job").Count();

Chart.Plot(
    new Scatter
    {
        x = jobs.Columns["job"],
        y = jobs.Columns["age"],
        fill = "tozerox", // for an area chart
        fillcolor = "green",
        mode = "markers" // none
    }
)

In [10]:
Chart.Plot(
    new Box
    {
        y = data.Columns["age"],
        name = "Age"
    }
)

In [11]:
var boxPlots = new List<Box>
{
    new Box
    {
        y = data.Columns["age"],
        name = "Age"
    },
    new Box
    {
        y = data.Columns["day"],
        name = "Day"
    }
};

Chart.Plot(boxPlots)

In [12]:
var featureColumns = new string[]{"age", "balance", "duration"};

var ageColumn = Enumerable.Range(0, (int)data.Rows.Count).Select(x => Convert.ToDouble(data["age"][x])).ToArray();
var balanceColumn = Enumerable.Range(0, (int)data.Rows.Count).Select(x => Convert.ToDouble(data["balance"][x])).ToArray();
var durationColumn = Enumerable.Range(0, (int)data.Rows.Count).Select(x => Convert.ToDouble(data["duration"][x])).ToArray();

var correlationMatrix = new List<List<double>>
{
    ageColumn.Select(x => (double)x).ToList(),
    balanceColumn.Select(x => (double)x).ToList(),
    durationColumn.Select(x => (double)x).ToList()
};
  
var length = featureColumns.Length;
var z = new double[length, length];
for (int x = 0; x < length; ++x)
{
  for (int y = 0; y < length - 1 - x; ++y)
  {
    var seriesA = correlationMatrix[x];
    var seriesB = correlationMatrix[length - 1 - y];
    var value = Correlation
     .Pearson(seriesA, seriesB);
        z[x, y] = value;
        z[length - 1 - y, length - 1 - x] = value;
    }
    z[x, length - 1 - x] = 1;
}

var yAxis = featureColumns.Reverse().ToArray();

var correlationMatrixHeatmap = Chart.Plot(
    new Heatmap 
    {
        x = featureColumns,
        y = yAxis,
        z = z,
        zmin = -1,
        zmax = 1
    }
);
display(correlationMatrixHeatmap);


In [13]:
Chart.Plot(
    new Heatmap
    {
        x = data.Columns["job"],
        y = data.Columns["marital"],
        z = data.Columns["balance"],
        autocolorscale = true,
        name = "Plot",
        opacity = 0.7
    }
)

In [14]:
Chart.Plot(
    new Contour
    {
        x = data.Columns["job"],
        y = data.Columns["marital"],
        z = data.Columns["balance"],
        autocolorscale = true,
        name = "Plot",
        opacity = 0.7
    }
)

In [15]:
Chart.Plot(
    new Scatter3d
    {
        x = data.Columns["job"],
        y = data.Columns["marital"],
        z = data.Columns["balance"]
    }
)