## Install AutoML NuGet package

In [1]:
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/MachineLearning/nuget/v3/index.json"

#r "nuget:Microsoft.ML.AutoML,0.20.0-preview.22424.1"

Loading extensions from `Microsoft.ML.AutoML.Interactive.dll`

## Import packages

In [1]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.AutoML;

## Define input schema

In [1]:
public class Input
{
	[LoadColumn(0,149999)]
	[VectorType(150000)]
	public float[] Features {get;set;}

	[LoadColumn(150000)]
	public bool Label {get;set;}
}

## Define data path

In [1]:
var dataPath = @"C:\Datasets\BBD_Full.csv";

## Initialize MLContext

In [1]:
var mlContext = new MLContext();

## Load data into IDataView

In [1]:
var data = mlContext.Data.LoadFromTextFile<Input>(dataPath, hasHeader:true,separatorChar:',');

## Shuffle data

**NOTE: This is not needed when using the full dataset. This is done so there's 0 and 1 values in the *Label* column since only 5k are sampled to simplify training in this sample.**

In [1]:
var shuffledData = mlContext.Data.ShuffleRows(data);

## Take 5k rows

This is to make training faster. Not needed when using the entire dataset.

In [1]:
var sample = mlContext.Data.TakeRows(shuffledData,5000);

In [1]:
var trainTestData = mlContext.Data.TrainTestSplit(sample, testFraction:0.1);

## Display IDataView Schema

In [1]:
data.Schema

index,Name,Index,IsHidden,Type,Annotations
Dimensions,IsKnownSize,ItemType,Size,RawType,Unnamed: 5_level_1
Schema,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
RawType,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3
Schema,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4
0,Features,0,False,DimensionsIsKnownSizeItemTypeSizeRawType[ 150000 ]True{ Single: RawType: System.Single }150000Microsoft.ML.Data.VBuffer<System.Single>,Schema[ ]
Dimensions,IsKnownSize,ItemType,Size,RawType,
[ 150000 ],True,{ Single: RawType: System.Single },150000,Microsoft.ML.Data.VBuffer<System.Single>,
Schema,,,,,
[ ],,,,,
1,Label,1,False,RawTypeSystem.Boolean,Schema[ ]
RawType,,,,,
System.Boolean,,,,,
Schema,,,,,
[ ],,,,,

Dimensions,IsKnownSize,ItemType,Size,RawType
[ 150000 ],True,{ Single: RawType: System.Single },150000,Microsoft.ML.Data.VBuffer<System.Single>

Schema
[ ]

RawType
System.Boolean

Schema
[ ]


## Define pipeline

In [1]:
var pipeline = 
	mlContext.Auto().Featurizer(trainTestData.TrainSet,numericColumns:new[] {"Features"})
		.Append(mlContext.Auto().BinaryClassification());

## Initialize AutoML experiment and configure settings

In [1]:
var experiment = mlContext.Auto().CreateExperiment();

experiment
	.SetPipeline(pipeline)
	.SetTrainingTimeInSeconds(60)
	.SetBinaryClassificationMetric(BinaryClassificationMetric.Accuracy, labelColumn:"Label")
	.SetDataset(trainTestData.TrainSet, trainTestData.TestSet);

## Run AutoML experiment

In [1]:
var result = await experiment.RunAsync();

## Display metric

In [1]:
result.Metric

## Save model

In [1]:
mlContext.Model.Save(result.Model, data.Schema, "BBDModel.zip");