## Davi Ramos -> Cientista de Dados 👋
(davi.info@gmail.com)

[![Linkedin Badge](https://img.shields.io/badge/-LinkedIn-blue?style=flat-square&logo=Linkedin&logoColor=white&link=https://www.linkedin.com/in/davi-ramos/)](https://www.linkedin.com/in/davi-ramos/)
[![Twitter Badge](https://img.shields.io/badge/-Twitter-1DA1F2?style=flat-square&logo=Twitter&logoColor=white&link=https://twitter.com/Daviinfo/)](https://twitter.com/Daviinfo/)
<a href="https://github.com/DaviRamos"><img src="https://img.shields.io/github/followers/DaviRamos.svg?label=GitHub&style=social" alt="GitHub"></a>

# ML.Net - Análise de Sentimentos
## Este exemplo irá criar um modelo de Análise de Sentimentos nos comentários Rotulados do Yelp.

In [3]:
// Instalar os Pacotes do Nuget

// ML.NET
#r "nuget:Microsoft.ML"  

In [4]:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using static Microsoft.ML.DataOperationsCatalog;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms.Text;

In [5]:
public class SentimentData
{
    [LoadColumn(0)]
    public string SentimentText;

    [LoadColumn(1), ColumnName("Label")]
    public bool Sentiment;
}

public class SentimentPrediction : SentimentData
{

    [ColumnName("PredictedLabel")]
    public bool Prediction { get; set; }

    public float Probability { get; set; }

    public float Score { get; set; }
}






In [6]:
private static string TRAIN_DATA_FILEPATH = @"./datasets/yelp_labelled/yelp_labelled.txt";
private static string MODEL_FILEPATH = @"./datasets/yelp_labelled/MLModel.zip";

In [8]:
MLContext mlContext = new MLContext();

In [13]:
// Note that this case, loading your training data from a file, 
// is the easiest way to get started, but ML.NET also allows you 
// to load data from databases or in-memory collections.
IDataView dataView = mlContext.Data.LoadFromTextFile<SentimentData>(TRAIN_DATA_FILEPATH, hasHeader: false);

In [17]:
// You need both a training dataset to train the model and a test dataset to evaluate the model.
// Split the loaded dataset into train and test datasets
// Specify test dataset percentage with the `testFraction`parameter
TrainTestData splitDataView = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);

In [20]:
// Create a flexible pipeline (composed by a chain of estimators) for creating/training the model.
// This is used to format and clean the data.  
// Convert the text column to numeric vectors (Features column)  
// append the machine learning task to the estimator
var estimator = mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Features", inputColumnName: nameof(SentimentData.SentimentText))
                            .Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features"));
// Create and train the model based on the dataset that has been loaded, transformed.

Console.WriteLine("=============== Create and Train the Model ===============");
var model = estimator.Fit(splitDataView.TrainSet);
Console.WriteLine("=============== End of training ===============");
Console.WriteLine();








In [22]:
// Evaluate the model and show accuracy stats

//Take the data in, make transformations, output the data. 
Console.WriteLine("=============== Evaluating Model accuracy with Test data===============");
IDataView predictions = model.Transform(splitDataView.TestSet);

// BinaryClassificationContext.Evaluate returns a BinaryClassificationEvaluator.CalibratedResult
// that contains the computed overall metrics.
CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(predictions, "Label");

// The Accuracy metric gets the accuracy of a model, which is the proportion 
// of correct predictions in the test set.

// The AreaUnderROCCurve metric is equal to the probability that the algorithm ranks
// a randomly chosen positive instance higher than a randomly chosen negative one
// (assuming 'positive' ranks higher than 'negative').

// The F1Score metric gets the model's F1 score.
// The F1 score is the harmonic mean of precision and recall:
//  2 * precision * recall / (precision + recall).

Console.WriteLine();
Console.WriteLine("Model quality metrics evaluation");
Console.WriteLine("--------------------------------");
Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
Console.WriteLine($"Auc: {metrics.AreaUnderRocCurve:P2}");
Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
Console.WriteLine("=============== End of model evaluation ===============");



Model quality metrics evaluation
--------------------------------
Accuracy: 83,96%
Auc: 90,04%
F1Score: 84,38%


In [23]:
PredictionEngine<SentimentData, SentimentPrediction> predictionFunction = mlContext.Model.CreatePredictionEngine<SentimentData, SentimentPrediction>(model);

SentimentData sampleStatement = new SentimentData
{
    SentimentText = "This was a very bad steak"
};

var resultPrediction = predictionFunction.Predict(sampleStatement);

Console.WriteLine();
Console.WriteLine("=============== Prediction Test of model with a single sample and test dataset ===============");

Console.WriteLine();
Console.WriteLine($"Sentiment: {resultPrediction.SentimentText} | Prediction: {(Convert.ToBoolean(resultPrediction.Prediction) ? "Positive" : "Negative")} | Probability: {resultPrediction.Probability} ");

Console.WriteLine("=============== End of Predictions ===============");
Console.WriteLine();



Sentiment: This was a very bad steak | Prediction: Negative | Probability: 0,02867834 



In [24]:
// Adds some comments to test the trained model's data points.
IEnumerable<SentimentData> sentiments = new[]
{
    new SentimentData
    {
        SentimentText = "This was a horrible meal"
    },
    new SentimentData
    {
        SentimentText = "I love this spaghetti."
    }
};

// Load batch comments just created 

IDataView batchComments = mlContext.Data.LoadFromEnumerable(sentiments);

IDataView predictions = model.Transform(batchComments);

// Use model to predict whether comment data is Positive (1) or Negative (0).
IEnumerable<SentimentPrediction> predictedResults = mlContext.Data.CreateEnumerable<SentimentPrediction>(predictions, reuseRowObject: false);

Console.WriteLine();

Console.WriteLine("=============== Prediction Test of loaded model with multiple samples ===============");

Console.WriteLine();

foreach (SentimentPrediction prediction in predictedResults)
{
    Console.WriteLine($"Sentiment: {prediction.SentimentText} | Prediction: {(Convert.ToBoolean(prediction.Prediction) ? "Positive" : "Negative")} | Probability: {prediction.Probability} ");

}
Console.WriteLine("=============== End of predictions ===============");



Sentiment: This was a horrible meal | Prediction: Negative | Probability: 0,040245872 
Sentiment: I love this spaghetti. | Prediction: Positive | Probability: 0,9973949 
