# ML.Net - Titanic

In [1]:
// ML.NET Nuget packages installation
#r "nuget:Microsoft.ML"

Installed package Microsoft.ML version 1.5.0

In [2]:
using System;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;

In [3]:
public sealed class Passenger
{
    [LoadColumn(0)]
    public bool Survived { get; set; }

    [LoadColumn(1)]
    public float Pclass { get; set; }

    [LoadColumn(2)]
    public string Name { get; set; }

    [LoadColumn(3)]
    public string Sex { get; set; }

    [LoadColumn(4)]
    public float Age { get; set; }

    [LoadColumn(5)]
    public float SiblingsAboard { get; set; }

    [LoadColumn(6)]
    public float ParentsAboard { get; set; }
}

public sealed class SurvivalPrediction
{
    [ColumnName("PredictedLabel")]
    public bool Survived { get; set; }
}

In [4]:
public static void PrintBinaryClassificationMetrics(string name, CalibratedBinaryClassificationMetrics metrics)
{
    Console.WriteLine($"************************************************************");
    Console.WriteLine($"*       Metrics for {name} binary classification model      ");
    Console.WriteLine($"*-----------------------------------------------------------");
    Console.WriteLine($"*       Accuracy: {metrics.Accuracy:P2}");
    Console.WriteLine($"*       F1Score:  {metrics.F1Score:P2}");
    Console.WriteLine($"*       LogLoss:  {metrics.LogLoss:#.##}");
    Console.WriteLine($"*       LogLossReduction:  {metrics.LogLossReduction:#.##}");
    Console.WriteLine($"*       PositivePrecision:  {metrics.PositivePrecision:#.##}");
    Console.WriteLine($"*       PositiveRecall:  {metrics.PositiveRecall:#.##}");
    Console.WriteLine($"*       NegativePrecision:  {metrics.NegativePrecision:#.##}");
    Console.WriteLine($"*       NegativeRecall:  {metrics.NegativeRecall:P2}");
    Console.WriteLine($"************************************************************");
}

In [5]:
private const string DataPath = "Datasets/Titanic_Data/data.csv";

In [6]:
Console.WriteLine("Starting to train model");

var mlContext = new MLContext(seed: 1);

//Load
Console.WriteLine("Reading data...");
var data = mlContext.Data.LoadFromTextFile<Passenger>(DataPath, hasHeader: true, separatorChar: ',');
var testTrainDataSet = mlContext.Data.TrainTestSplit(data);

//Transform
Console.WriteLine("Transforming data...");
var dataProcessPipeline = mlContext.Transforms.Categorical.OneHotEncoding("Sex")
    .Append(mlContext.Transforms.Text.FeaturizeText("Name"))
    .Append(mlContext.Transforms.ReplaceMissingValues("Age", replacementMode: MissingValueReplacingEstimator.ReplacementMode.Mean)
    .Append(mlContext.Transforms.Concatenate("Features", "Pclass", "Sex", "Name",
            "SiblingsAboard", "ParentsAboard")));

//Train
Console.WriteLine("Training data...");
var trainingPipeline = dataProcessPipeline
    .Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(labelColumnName: "Survived"));
ITransformer trainedModel = trainingPipeline.Fit(testTrainDataSet.TrainSet);

//Evaluate
var predictions = trainedModel.Transform(testTrainDataSet.TestSet);
var metrics = mlContext.BinaryClassification.Evaluate(predictions, labelColumnName: "Survived");

//Print metrics
PrintBinaryClassificationMetrics(trainingPipeline.ToString(), metrics);

//Save model
mlContext.Model.Save(trainedModel, testTrainDataSet.TrainSet.Schema, "model.zip");

Console.ReadLine();

Starting to train model
Reading data...
Transforming data...
Training data...
************************************************************
*       Metrics for Microsoft.ML.Data.EstimatorChain`1[Microsoft.ML.Data.BinaryPredictionTransformer`1[Microsoft.ML.Calibrators.CalibratedModelParametersBase`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]]] binary classification model      
*-----------------------------------------------------------
*       Accuracy: 85,39%
*       F1Score:  77,19%
*       LogLoss:  ,52
*       LogLossReduction:  ,44
*       PositivePrecision:  ,81
*       PositiveRecall:  ,73
*       NegativePrecision:  ,87
*       NegativeRecall:  91,53%
************************************************************
