<img align="Left" alt="Dados_ao_Cubo" src="../shared_content/DADOS-AO-CUBO-Versão-04-1.png" />
<img align="right" alt="MLNet" src="../shared_content/EJ3KsfJWsAEtDaZ.png" />

## Davi Ramos -> Cientista de Dados 👋
(davi.info@gmail.com)

[![Linkedin Badge](https://img.shields.io/badge/-LinkedIn-blue?style=flat-square&logo=Linkedin&logoColor=white&link=https://www.linkedin.com/in/davi-ramos/)](https://www.linkedin.com/in/davi-ramos/)
[![Twitter Badge](https://img.shields.io/badge/-Twitter-1DA1F2?style=flat-square&logo=Twitter&logoColor=white&link=https://twitter.com/Daviinfo/)](https://twitter.com/Daviinfo/)
<a href="https://github.com/DaviRamos"><img src="https://img.shields.io/github/followers/DaviRamos.svg?label=GitHub&style=social" alt="GitHub"></a>

# ML.Net - Auto ML

## Este exemplo irá xxxxxx

In [1]:
// Instalar os Pacotes do Nuget

// ML.NET
#r "nuget:Microsoft.ML"  

// ML.NET AutoML
#r "nuget:Microsoft.ML.AutoML" 

Installed package Microsoft.ML.AutoML version 0.17.2

Installed package Microsoft.ML version 1.5.2

In [2]:
using System;
using System.IO;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.AutoML;
using Microsoft.ML.Data;

using static Microsoft.ML.TrainCatalogBase;
using static Microsoft.ML.DataOperationsCatalog;

In [10]:
// Caminho dos arquivos
string housingPath = "./Datasets/Housing/housing.csv";
string ModelPath = @"./HouseModelv3.zip";
private static uint ExperimentTime = 10;

In [4]:
public class ModelInput
{
    [LoadColumn(0)]
    public float Longitude { get; set; }
    [LoadColumn(1)]
    public float Latitude { get; set; }
    [LoadColumn(2)]
    public float Housing_median_age { get; set; }
    [LoadColumn(3)]
    public float Total_rooms { get; set; }
    [LoadColumn(4)]
    public float Total_bedrooms { get; set; }
    [LoadColumn(5)]
    public float Population { get; set; }
    [LoadColumn(6)]
    public float Households { get; set; }
    [LoadColumn(7)]
    public float Median_income { get; set; }
    [ColumnName("Label"), LoadColumn(8)]
    public float Median_house_value { get; set; }
    [LoadColumn(9)]
    public string Ocean_proximity { get; set; }
}

public class ModelOutput
{
    [ColumnName("Score")]
    public float Score { get; set; }
}

In [5]:
private static void PrintMetrics(RegressionMetrics metrics)
{
    Console.WriteLine($"MeanAbsoluteError: {metrics.MeanAbsoluteError}");
    Console.WriteLine($"MeanSquaredError: {metrics.MeanSquaredError}");
    Console.WriteLine($"RootMeanSquaredError: {metrics.RootMeanSquaredError}");
    Console.WriteLine($"RSquared: {metrics.RSquared}");
}

In [6]:
// Criar o MLContext para ser compartilhado entre os objetos de fluxo de trabalho de criação do modelo
// Definir uma semente aleatória para resultados repetíveis/determinísticos nos vários treinamentos.
MLContext mlContext = new MLContext(seed: 1);

In [13]:
private static IDataView trainDataView;
private static IDataView testDataView;

//Configuração do carregamento de dados         
IDataView fullData = mlContext.Data.LoadFromTextFile<ModelInput>(path: housingPath, hasHeader: true, 
                                                                         separatorChar: ',', allowQuoting: true, allowSparse: false);

// Divide o conjunto de dados em duas partes: TrainingDataset (80%) e TestDataset (20%)
var trainTestData = mlContext.Data.TrainTestSplit(fullData, testFraction: 0.2);
trainDataView = trainTestData.TrainSet;
testDataView = trainTestData.TestSet;

// STEP 2: Run AutoML experiment
Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds...");

var settings = new RegressionExperimentSettings
           {
               MaxExperimentTimeInSeconds = ExperimentTime,
               OptimizingMetric = RegressionMetric.MeanAbsoluteError
           };

var labelColumnInfo = new ColumnInformation()
{
    LabelColumnName = "Label"
};

var progress = new Progress<RunDetail<RegressionMetrics>>(p => 
{
    if (p.ValidationMetrics != null)
    {
        Console.WriteLine($"Current Result - {p.TrainerName}, {p.ValidationMetrics.RSquared}, {p.ValidationMetrics.MeanAbsoluteError}");
    }
});

var experiment = mlContext.Auto().CreateRegressionExperiment(settings);

var experimentResult = experiment.Execute(trainDataView, labelColumnInfo, progressHandler: progress);

Running AutoML regression experiment for 10 seconds...
Current Result - SdcaRegression, 0,6275558016003226, 51551,89921636086
Current Result - FastTreeRegression, 0,8276824825461914, 31970,344956517583
Current Result - FastTreeTweedieRegression, 0,8216709829703559, 32635,116353688838
Current Result - FastForestRegression, 0,6647401290673212, 49037,0146358945
Current Result - LbfgsPoissonRegression, 0,623092389750161, 50304,22777976873
Current Result - OnlineGradientDescentRegression, 0,44966252559781805, 64364,18815462538
Current Result - FastTreeRegression, 0,5919585518519238, 51804,475554281344
Current Result - FastTreeTweedieRegression, -2,8748512195422906, 196081,4350484399
Current Result - FastForestRegression, 0,6049637958273402, 54135,36702025994
Current Result - FastTreeRegression, 0,8171954514353129, 33366,53864439985
Current Result - FastTreeTweedieRegression, 0,7530260645732515, 39691,57006163991
Current Result - FastForestRegression, 0,6443569359697736, 50944,01627006881
Cu

In [15]:
// STEP 3: Print metric from best model
var bestRun = experimentResult.BestRun;
Console.WriteLine($"=====================================================");
Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");

var i = 0;
foreach (var experimentResultRunDetail in experimentResult.RunDetails)
{
    i++;
    Console.WriteLine($"  {i} – TrainerName: {experimentResultRunDetail.TrainerName}");
    Console.WriteLine($"      Runtime In Seconds: {experimentResultRunDetail.RuntimeInSeconds}");
    Console.WriteLine(Environment.NewLine);
    //PrintMetrics(experimentResultRunDetail.ValidationMetrics);
}

Console.WriteLine($"");
Console.WriteLine($"=====================================================");
Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
PrintMetrics(bestRun.ValidationMetrics);


// STEP 5: Evaluate test data
IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView);
RegressionMetrics testMetrics = mlContext.Regression.Evaluate(testDataViewWithBestScore, labelColumnName: "Label");
Console.WriteLine(Environment.NewLine);
Console.WriteLine($"Metrics of best model on test data --");
Console.WriteLine(Environment.NewLine);
PrintMetrics(testMetrics);

// STEP 6: Save the best model for later deployment and inferencing
using (FileStream fs = File.Create(ModelPath))
    mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs);

// STEP 7: Create prediction engine from the best trained model
var predictionEngine = mlContext.Model.CreatePredictionEngine<ModelInput, ModelOutput>(bestRun.Model);

// STEP 8: Initialize a new test taxi trip, and get the predicted fare
var sampleData = new ModelInput()
{
    Longitude = -122.23F,
    Latitude = 37.88F,
    Housing_median_age = 41F,
    Total_rooms = 880F,
    Total_bedrooms = 129F,
    Population = 322F,
    Households = 126F,
    Median_income = 8.3252F,
    Ocean_proximity = @"NEAR BAY",
};

var resultprediction = predictionEngine.Predict (sampleData);

Console.WriteLine($"--");
Console.WriteLine("Usando o modelo para fazer predição única - Comparando o valor atual de Median_house_value com o valor previsto de para os dados de amostra de Median_house_value ... \n \n");
Console.WriteLine($"Longitude: {sampleData.Longitude}");
Console.WriteLine($"Latitude: {sampleData.Latitude}");
Console.WriteLine($"Housing_median_age: {sampleData.Housing_median_age}");
Console.WriteLine($"Total_rooms: {sampleData.Total_rooms}");
Console.WriteLine($"Total_bedrooms: {sampleData.Total_bedrooms}");
Console.WriteLine($"Population: {sampleData.Population}");
Console.WriteLine($"Households: {sampleData.Households}");
Console.WriteLine($"Median_income: {sampleData.Median_income}");
Console.WriteLine($"Ocean_proximity: {sampleData.Ocean_proximity}");
Console.WriteLine ($"\n \nO Preço previsto para este tamanho é: {resultprediction.Score} \n \n");
Console.WriteLine ("=============== Fim do processo ===============");

Total models produced: 22
  1 – TrainerName: SdcaRegression
      Runtime In Seconds: 0,1744149


  2 – TrainerName: LightGbmRegression
      Runtime In Seconds: 0,283941


  3 – TrainerName: FastTreeRegression
      Runtime In Seconds: 0,2409188


  4 – TrainerName: FastTreeTweedieRegression
      Runtime In Seconds: 0,3046249


  5 – TrainerName: FastForestRegression
      Runtime In Seconds: 0,32777


  6 – TrainerName: LbfgsPoissonRegression
      Runtime In Seconds: 0,1478876


  7 – TrainerName: OnlineGradientDescentRegression
      Runtime In Seconds: 0,0751289


  8 – TrainerName: OlsRegression
      Runtime In Seconds: 0,0717289


  9 – TrainerName: FastTreeRegression
      Runtime In Seconds: 0,1814217


  10 – TrainerName: FastTreeTweedieRegression
      Runtime In Seconds: 0,1565188


  11 – TrainerName: FastForestRegression
      Runtime In Seconds: 0,2505778


  12 – TrainerName: FastTreeRegression
      Runtime In Seconds: 0,5682993


  13 – TrainerName: FastTreeTweedieR