
### This Interactive Notebook was generated by ML.NET Tooling.

The code below demonstrates how to

1. Define the model input and output schema
1. Load in data from a text file to an IDataView
1. Set up the training pipeline with data transforms
1. Choose an algorithm and append it to the pipeline
1. Train the model
1. Evaluate the model
1. Consume the model


## Install the necessary NuGet packages for training ML.NET model and plotting:

In [None]:
// ML.NET Model Builder generated Notebook file. Notebook files contain both code snippets and rich text elements.
// Use the "run" button in the left margin to execute each code snippet and explore ML.NET.

#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json" 
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json" 

#r "nuget:Microsoft.ML,1.5.5"
#r "nuget:Microsoft.ML.LightGbm,1.5.5"
#r "nuget:Microsoft.Data.Analysis,0.4.0"


In [None]:
using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;
using Microsoft.DotNet.Interactive.Formatting;
using Microsoft.Data.Analysis;
using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;

In [None]:
// Register your dataset into a dataframe to nicely display metrics

using Microsoft.AspNetCore.Html;
using Microsoft.DotNet.Interactive.Formatting;
using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;

Formatter.Register<DataFrame>((df, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));
    var rows = new List<List<IHtmlContent>>();
    var take = 20;
    for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var obj in df.Rows[i])
        {            cells.Add(td(obj));
        }
        rows.Add(cells);
    }
    
    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));
    
    writer.Write(t);
}, "text/html");

## Load dataset

In [None]:
// download dataset
string trainDataPath = @"abert.csv";
using (var client = new WebClient())
{
    client.DownloadFile(@"https://automlbenchmark.blob.core.windows.net/dataset/Albert_1.csv", trainDataPath);
}

var df = DataFrame.LoadCsv(trainDataPath, numRows: 100000);
df.Head(10)

## Split dataset

In [None]:
var mlContext = new MLContext();
var dataset = mlContext.Data.ShuffleRows(df);
var trainTestSplit = mlContext.Data.TrainTestSplit(dataset);
var train = trainTestSplit.TrainSet;
var test = trainTestSplit.TestSet;

## Create, train and evaluate the training pipeline.

In [None]:
using Microsoft.ML.Data;
using Microsoft.ML.Trainers.LightGbm;
using Microsoft.ML.Trainers;
using Microsoft.ML;
using Microsoft.ML.Runtime;

mlContext.Log += (object sender, LoggingEventArgs e) => {
    if(e.Kind == ChannelMessageKind.Info)
        Console.WriteLine(e.Message);
};

// Append the trainer to the data processing pipeline
var pipeline = mlContext.Transforms.ReplaceMissingValues(new []{new InputOutputColumnPair(@"col_1", @"col_1"),new InputOutputColumnPair(@"col_2", @"col_2"),new InputOutputColumnPair(@"col_3", @"col_3"),new InputOutputColumnPair(@"col_4", @"col_4"),new InputOutputColumnPair(@"col_5", @"col_5"),new InputOutputColumnPair(@"col_6", @"col_6"),new InputOutputColumnPair(@"col_7", @"col_7"),new InputOutputColumnPair(@"col_8", @"col_8"),new InputOutputColumnPair(@"col_9", @"col_9"),new InputOutputColumnPair(@"col_10", @"col_10"),new InputOutputColumnPair(@"col_11", @"col_11"),new InputOutputColumnPair(@"col_12", @"col_12"),new InputOutputColumnPair(@"col_13", @"col_13"),new InputOutputColumnPair(@"col_14", @"col_14"),new InputOutputColumnPair(@"col_15", @"col_15"),new InputOutputColumnPair(@"col_16", @"col_16"),new InputOutputColumnPair(@"col_17", @"col_17"),new InputOutputColumnPair(@"col_18", @"col_18"),new InputOutputColumnPair(@"col_19", @"col_19"),new InputOutputColumnPair(@"col_20", @"col_20"),new InputOutputColumnPair(@"col_21", @"col_21"),new InputOutputColumnPair(@"col_22", @"col_22"),new InputOutputColumnPair(@"col_23", @"col_23"),new InputOutputColumnPair(@"col_24", @"col_24"),new InputOutputColumnPair(@"col_25", @"col_25"),new InputOutputColumnPair(@"col_26", @"col_26"),new InputOutputColumnPair(@"col_27", @"col_27"),new InputOutputColumnPair(@"col_28", @"col_28"),new InputOutputColumnPair(@"col_29", @"col_29"),new InputOutputColumnPair(@"col_30", @"col_30"),new InputOutputColumnPair(@"col_31", @"col_31"),new InputOutputColumnPair(@"col_32", @"col_32"),new InputOutputColumnPair(@"col_33", @"col_33"),new InputOutputColumnPair(@"col_34", @"col_34"),new InputOutputColumnPair(@"col_35", @"col_35"),new InputOutputColumnPair(@"col_36", @"col_36"),new InputOutputColumnPair(@"col_37", @"col_37"),new InputOutputColumnPair(@"col_38", @"col_38"),new InputOutputColumnPair(@"col_39", @"col_39"),new InputOutputColumnPair(@"col_40", @"col_40"),new InputOutputColumnPair(@"col_41", @"col_41"),new InputOutputColumnPair(@"col_42", @"col_42"),new InputOutputColumnPair(@"col_43", @"col_43"),new InputOutputColumnPair(@"col_44", @"col_44"),new InputOutputColumnPair(@"col_45", @"col_45"),new InputOutputColumnPair(@"col_46", @"col_46"),new InputOutputColumnPair(@"col_47", @"col_47"),new InputOutputColumnPair(@"col_48", @"col_48"),new InputOutputColumnPair(@"col_49", @"col_49"),new InputOutputColumnPair(@"col_50", @"col_50"),new InputOutputColumnPair(@"col_51", @"col_51"),new InputOutputColumnPair(@"col_52", @"col_52"),new InputOutputColumnPair(@"col_53", @"col_53"),new InputOutputColumnPair(@"col_54", @"col_54"),new InputOutputColumnPair(@"col_55", @"col_55"),new InputOutputColumnPair(@"col_56", @"col_56"),new InputOutputColumnPair(@"col_57", @"col_57"),new InputOutputColumnPair(@"col_58", @"col_58"),new InputOutputColumnPair(@"col_59", @"col_59"),new InputOutputColumnPair(@"col_60", @"col_60"),new InputOutputColumnPair(@"col_61", @"col_61"),new InputOutputColumnPair(@"col_62", @"col_62"),new InputOutputColumnPair(@"col_63", @"col_63"),new InputOutputColumnPair(@"col_64", @"col_64"),new InputOutputColumnPair(@"col_65", @"col_65"),new InputOutputColumnPair(@"col_66", @"col_66"),new InputOutputColumnPair(@"col_67", @"col_67"),new InputOutputColumnPair(@"col_68", @"col_68"),new InputOutputColumnPair(@"col_69", @"col_69"),new InputOutputColumnPair(@"col_70", @"col_70"),new InputOutputColumnPair(@"col_71", @"col_71"),new InputOutputColumnPair(@"col_72", @"col_72"),new InputOutputColumnPair(@"col_73", @"col_73"),new InputOutputColumnPair(@"col_74", @"col_74"),new InputOutputColumnPair(@"col_75", @"col_75"),new InputOutputColumnPair(@"col_76", @"col_76"),new InputOutputColumnPair(@"col_77", @"col_77"),new InputOutputColumnPair(@"col_78", @"col_78")})      
    .Append(mlContext.Transforms.Concatenate(@"Features", new []{@"col_1",@"col_2",@"col_3",@"col_4",@"col_5",@"col_6",@"col_7",@"col_8",@"col_9",@"col_10",@"col_11",@"col_12",@"col_13",@"col_14",@"col_15",@"col_16",@"col_17",@"col_18",@"col_19",@"col_20",@"col_21",@"col_22",@"col_23",@"col_24",@"col_25",@"col_26",@"col_27",@"col_28",@"col_29",@"col_30",@"col_31",@"col_32",@"col_33",@"col_34",@"col_35",@"col_36",@"col_37",@"col_38",@"col_39",@"col_40",@"col_41",@"col_42",@"col_43",@"col_44",@"col_45",@"col_46",@"col_47",@"col_48",@"col_49",@"col_50",@"col_51",@"col_52",@"col_53",@"col_54",@"col_55",@"col_56",@"col_57",@"col_58",@"col_59",@"col_60",@"col_61",@"col_62",@"col_63",@"col_64",@"col_65",@"col_66",@"col_67",@"col_68",@"col_69",@"col_70",@"col_71",@"col_72",@"col_73",@"col_74",@"col_75",@"col_76",@"col_77",@"col_78"}))      
    .Append(mlContext.Transforms.Conversion.ConvertType(@"label", "label", DataKind.Boolean))
    .Append(mlContext.BinaryClassification.Trainers.LightGbm(new LightGbmBinaryTrainer.Options(){NumberOfLeaves=667,MinimumExampleCountPerLeaf=14,NumberOfIterations=1433,MaximumBinCountPerFeature=902,LearningRate=0.598833188763671F,LabelColumnName=@"label",FeatureColumnName=@"Features",Booster=new GradientBooster.Options(){SubsampleFraction=0.73979377803767F,FeatureFraction=0.997659156941006F,L1Regularization=1.89444501612553E-09F,L2Regularization=0.00655912177918019F}}));

// Train the model (fit the model to the training data)
var model = pipeline.Fit(train);
var data = model.Transform(test);
var eval = mlContext.BinaryClassification.Evaluate(data, "label");
eval

## Save the model

In [None]:
mlContext.Model.Save(model, train.Schema, "abert.zip");