## Davi Ramos -> Cientista de Dados 👋
(davi.info@gmail.com)

[![Linkedin Badge](https://img.shields.io/badge/-LinkedIn-blue?style=flat-square&logo=Linkedin&logoColor=white&link=https://www.linkedin.com/in/davi-ramos/)](https://www.linkedin.com/in/davi-ramos/)
[![Twitter Badge](https://img.shields.io/badge/-Twitter-1DA1F2?style=flat-square&logo=Twitter&logoColor=white&link=https://twitter.com/Daviinfo/)](https://twitter.com/Daviinfo/)
<a href="https://github.com/DaviRamos"><img src="https://img.shields.io/github/followers/DaviRamos.svg?label=GitHub&style=social" alt="GitHub"></a>

# ML.Net - Análise de Sentimentos
## Este exemplo irá criar um modelo para Análise  de sentimentos em Comentários Tóxicos

In [1]:
// Instalar os Pacotes do Nuget

// ML.NET
#r "nuget:Microsoft.ML"  

Installed package Microsoft.ML version 1.5.2

In [2]:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;

In [3]:
public class ModelInput
{
    [ColumnName("Sentiment"), LoadColumn(0)]
    public bool Sentiment { get; set; }


    [ColumnName("SentimentText"), LoadColumn(1)]
    public string SentimentText { get; set; }
}

public class ModelOutput
{
    // ColumnName attribute is used to change the column name from
    // its default value, which is the name of the field.
    [ColumnName("PredictedLabel")]
    public bool Prediction { get; set; }

    public float Score { get; set; }
}

In [23]:
private static string TRAIN_DATA_FILEPATH = @"./datasets/wikipedia_detox_250/wikipedia-detox-250-line-data.tsv";
private static string MODEL_FILEPATH = @"./datasets/wikipedia_detox_250/MLModel.zip";

In [24]:
public static void PrintBinaryClassificationMetrics(BinaryClassificationMetrics metrics)
{
    Console.WriteLine($"************************************************************");
    Console.WriteLine($"*       Metrics for binary classification model      ");
    Console.WriteLine($"*-----------------------------------------------------------");
    Console.WriteLine($"*       Accuracy: {metrics.Accuracy:P2}");
    Console.WriteLine($"*       Auc:      {metrics.AreaUnderRocCurve:P2}");
    Console.WriteLine($"************************************************************");
}


public static void PrintBinaryClassificationFoldsAverageMetrics(IEnumerable<TrainCatalogBase.CrossValidationResult<BinaryClassificationMetrics>> crossValResults)
{
    var metricsInMultipleFolds = crossValResults.Select(r => r.Metrics);

    var AccuracyValues = metricsInMultipleFolds.Select(m => m.Accuracy);
    var AccuracyAverage = AccuracyValues.Average();
    var AccuraciesStdDeviation = CalculateStandardDeviation(AccuracyValues);
    var AccuraciesConfidenceInterval95 = CalculateConfidenceInterval95(AccuracyValues);


    Console.WriteLine($"*************************************************************************************************************");
    Console.WriteLine($"*       Metrics for Binary Classification model      ");
    Console.WriteLine($"*------------------------------------------------------------------------------------------------------------");
    Console.WriteLine($"*       Average Accuracy:    {AccuracyAverage:0.###}  - Standard deviation: ({AccuraciesStdDeviation:#.###})  - Confidence Interval 95%: ({AccuraciesConfidenceInterval95:#.###})");
    Console.WriteLine($"*************************************************************************************************************");
}

public static double CalculateStandardDeviation(IEnumerable<double> values)
{
    double average = values.Average();
    double sumOfSquaresOfDifferences = values.Select(val => (val - average) * (val - average)).Sum();
    double standardDeviation = Math.Sqrt(sumOfSquaresOfDifferences / (values.Count() - 1));
    return standardDeviation;
}

public static double CalculateConfidenceInterval95(IEnumerable<double> values)
{
    double confidenceInterval95 = 1.96 * CalculateStandardDeviation(values) / Math.Sqrt((values.Count() - 1));
    return confidenceInterval95;
}

In [25]:
private static MLContext mlContext = new MLContext(seed: 1);

In [26]:
// Load Data
IDataView trainingDataView = mlContext.Data.LoadFromTextFile<ModelInput>(
                                path: TRAIN_DATA_FILEPATH,
                                hasHeader: true,
                                separatorChar: '\t',
                                allowQuoting: true,
                                allowSparse: false);

In [32]:
// Data process configuration with pipeline data transformations 
var dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText_tf", "SentimentText")
                          .Append(mlContext.Transforms.CopyColumns("Features", "SentimentText_tf"))
                          .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                          .AppendCacheCheckpoint(mlContext);

// Set the training algorithm 
var trainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: "Sentiment", numberOfIterations: 10, featureColumnName: "Features");
var trainingPipeline = dataProcessPipeline.Append(trainer);





In [28]:
// Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
// in order to evaluate and get the model's accuracy metrics
Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
var crossValidationResults = mlContext.BinaryClassification.CrossValidateNonCalibrated(trainingDataView, trainingPipeline, numberOfFolds: 5, labelColumnName: "Sentiment");
PrintBinaryClassificationFoldsAverageMetrics(crossValidationResults);

*************************************************************************************************************
*       Metrics for Binary Classification model      
*------------------------------------------------------------------------------------------------------------
*       Average Accuracy:    0,666  - Standard deviation: (,092)  - Confidence Interval 95%: (,09)
*************************************************************************************************************


In [29]:
Console.WriteLine("=============== Training  model ===============");

model = trainingPipeline.Fit(trainingDataView);

Console.WriteLine("=============== End of training process ===============");
return model;



index,type,Unnamed: 2,FeatureColumnName,FeatureColumnType,Model
0,Microsoft.ML.Transforms.Text.TextFeaturizingEstimator+Transformer,Microsoft.ML.Transforms.Text.TextFeaturizingEstimator+Transformer,,,
1,Microsoft.ML.Transforms.ColumnCopyingTransformer,Microsoft.ML.Transforms.ColumnCopyingTransformer,,,
2,Microsoft.ML.Transforms.NormalizingTransformer,Microsoft.ML.Transforms.NormalizingTransformer,,,
3,Microsoft.ML.Data.BinaryPredictionTransformer<Microsoft.ML.Trainers.LinearBinaryModelParameters>,,Features,"Vector<Single, 18068>",Microsoft.ML.Trainers.LinearBinaryModelParameters


In [30]:
// Save/persist the trained model to a .ZIP file
Console.WriteLine($"=============== Saving the model  ===============");
mlContext.Model.Save(model, trainingDataView.Schema, MODEL_FILEPATH);
Console.WriteLine("The model is saved to {0}", MODEL_FILEPATH);

The model is saved to ./datasets/wikipedia_detox_250/MLModel.zip
