# ML.Net - Dataframe with AutoML

In [None]:
#r "nuget:Microsoft.ML"
#r "nuget:Microsoft.ML.AutoML"
#r "nuget:Microsoft.Data.Analysis"

In [2]:
using Microsoft.ML;
using Microsoft.ML.AutoML;
using Microsoft.Data.Analysis;

In [3]:
using Microsoft.AspNetCore.Html;
Formatter<DataFrame>.Register((df, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));
    var rows = new List<List<IHtmlContent>>();
    var take = 10;
    for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var obj in df.Rows[i])
        {
            cells.Add(td(obj));
        }
        rows.Add(cells);
    }

    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));

    writer.Write(t);
}, "text/html");

In [4]:
var data = DataFrame.LoadCsv("./datasets/housing/housing.csv");
data

index,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-12223,3788,410,8800,1290,3220,1260,83252,4526000,NEAR BAY
1,-12222,3786,210,70990,11060,24010,11380,83014,3585000,NEAR BAY
2,-12224,3785,520,14670,1900,4960,1770,72574,3521000,NEAR BAY
3,-12225,3785,520,12740,2350,5580,2190,56431,3413000,NEAR BAY
4,-12225,3785,520,16270,2800,5650,2590,38462,3422000,NEAR BAY
5,-12225,3785,520,9190,2130,4130,1930,40368,2697000,NEAR BAY
6,-12225,3784,520,25350,4890,10940,5140,36591,2992000,NEAR BAY
7,-12225,3784,520,31040,6870,11570,6470,312,2414000,NEAR BAY
8,-12226,3784,420,25550,6650,12060,5950,20804,2267000,NEAR BAY
9,-12225,3784,520,35490,7070,15510,7140,36912,2611000,NEAR BAY


In [5]:
data.Description()

index,Description,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,Length (excluding null values),20640.0,20640.0,20640.0,20640.0,20433.0,20640.0,20640.0,20640.0,20640.0
1,Max,-1146.0,4195.0,520.0,393200.0,64450.0,356820.0,60820.0,150001.0,5000010.0
2,Min,-12435.0,326.0,10.0,20.0,0.0,30.0,10.0,9.0,149990.0
3,Mean,-10910.303,3245.5671,286.39487,26357.584,5324.7554,14254.769,4995.3945,33542.402,2068548.2


In [6]:
data = data.DropNulls()

index,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-12223,3788,410,8800,1290,3220,1260,83252,4526000,NEAR BAY
1,-12222,3786,210,70990,11060,24010,11380,83014,3585000,NEAR BAY
2,-12224,3785,520,14670,1900,4960,1770,72574,3521000,NEAR BAY
3,-12225,3785,520,12740,2350,5580,2190,56431,3413000,NEAR BAY
4,-12225,3785,520,16270,2800,5650,2590,38462,3422000,NEAR BAY
5,-12225,3785,520,9190,2130,4130,1930,40368,2697000,NEAR BAY
6,-12225,3784,520,25350,4890,10940,5140,36591,2992000,NEAR BAY
7,-12225,3784,520,31040,6870,11570,6470,312,2414000,NEAR BAY
8,-12226,3784,420,25550,6650,12060,5950,20804,2267000,NEAR BAY
9,-12225,3784,520,35490,7070,15510,7140,36912,2611000,NEAR BAY


In [7]:
var context = new MLContext();

var experiment = context.Auto().CreateRegressionExperiment(maxExperimentTimeInSeconds: 10);

var result = experiment.Execute((IDataView)data, labelColumnName:"median_house_value");

In [8]:
result.BestRun.TrainerName

FastTreeRegression

In [9]:
result.BestRun.ValidationMetrics.RSquared