# ML.Net - Titanic

In [2]:
// ML.NET Nuget packages installation
#r "nuget:Microsoft.ML"

Installed package Microsoft.ML version 1.5.0

In [8]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;
using System;
using System.Linq;

In [12]:
public class TitanicData
{
    [LoadColumn(0)]
    public float PassengerId;

    [LoadColumn(1)]
    public bool HasSurvived;

    [LoadColumn(2)]
    public float Pclass;

    [LoadColumn(3)]
    public string Name;

    [LoadColumn(4)]
    public string Sex;

    [LoadColumn(5)]
    public float Age;

    [LoadColumn(6)]
    public float SibSp;

    [LoadColumn(7)]
    public float Parch;

    [LoadColumn(8)]
    public string Ticket;

    [LoadColumn(9)]
    public float Fare;

    [LoadColumn(10)]
    public string Cabin;

    [LoadColumn(11)]
    public string Embarked;
}

public class TitanicPrediction
{
    [LoadColumn("PredictedLabel")]
    public bool Prediction;

    public float Score;
}

Unhandled exception: (42,17): error CS1503: Argumento 1: não é possível converter de "string" para "int"

In [6]:
var context = new MLContext();

var textLoader = context.Data.TextReader(new TextLoader.Arguments()
{
    Separator = ",",
    HasHeader = true,
    Column = new[]
    {
        new TextLoader.Column("PassengerId", DataKind.R4, 0),
        new TextLoader.Column("Label", DataKind.Bool, 1),
        new TextLoader.Column("Pclass", DataKind.R4, 2),
        new TextLoader.Column("Name", DataKind.Text, 3),
        new TextLoader.Column("Sex", DataKind.Text, 4),
        new TextLoader.Column("Age", DataKind.R4, 5),
        new TextLoader.Column("SibSp", DataKind.R4, 6),
        new TextLoader.Column("Parch", DataKind.R4, 7),
        new TextLoader.Column("Ticket", DataKind.Text, 8),
        new TextLoader.Column("Fare", DataKind.R4, 9),
        new TextLoader.Column("Cabin", DataKind.Text, 10),
        new TextLoader.Column("Embarked", DataKind.Text, 11)
    }
});

IDataView data = textLoader.Read(".Datasets/Titanic_Data/titanic.csv");

var (trainData, testData) = context.BinaryClassification.TrainTestSplit(data, testFraction: 0.2);

var pipeline = context.Transforms.Concatenate("Text", "Name", "Sex", "Embarked")
    .Append(context.Transforms.Text.FeaturizeText("Text", "TextFeatures"))
    .Append(context.Transforms.Concatenate("Features", "TextFeatures", "Pclass", "Age", "Fare", "SibSp", "Parch"))
    .Append(context.BinaryClassification.Trainers.LogisticRegression("Label", "Features"));

Console.WriteLine("Cross validating...");

var crossValidateResults = context.BinaryClassification.CrossValidate(testData, pipeline);

var averageAuc = crossValidateResults.Average(i => i.metrics.Auc);

Console.WriteLine($"Average AUC - {averageAuc}");

var model = pipeline.Fit(trainData);

var predictionFunction = model.MakePredictionFunction<TitanicData, TitanicPrediction>(context);

var prediction = predictionFunction.Predict(new TitanicData { Sex = "F" });

Console.WriteLine($"Prediction - {prediction.Prediction}");

Console.ReadLine();

Starting to train model
Reading data...
Transforming data...
Training data...
************************************************************
*       Metrics for Microsoft.ML.Data.EstimatorChain`1[Microsoft.ML.Data.BinaryPredictionTransformer`1[Microsoft.ML.Calibrators.CalibratedModelParametersBase`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]]] binary classification model      
*-----------------------------------------------------------
*       Accuracy: 85,39%
*       F1Score:  77,19%
*       LogLoss:  ,52
*       LogLossReduction:  ,44
*       PositivePrecision:  ,81
*       PositiveRecall:  ,73
*       NegativePrecision:  ,87
*       NegativeRecall:  91,53%
************************************************************
