# Hands-On ML.NET-LogisticRegression

In [17]:
// ML.NET Nuget packages installation
#r "nuget:Microsoft.ML" 
#r "nuget:Newtonsoft.Json" 

## Using C# Class

In [18]:
using Microsoft.ML;
using Microsoft.ML.Data;
using System;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using Newtonsoft.Json;

## ExtensionMethods

In [19]:
public static string[] ToPropertyList<T>(this Type objType, string labelName) => objType.GetProperties().Where(a => a.Name != labelName).Select(a => a.Name).ToArray();

## Declare data-classes for input data and predictions

In [20]:
public class EmploymentHistory
{
    [LoadColumn(0)]
    public float DurationInMonths { get; set; }

    [LoadColumn(1)]
    public float IsMarried { get; set; }

    [LoadColumn(2)]
    public float BSDegree { get; set; }

    [LoadColumn(3)]
    public float MSDegree { get; set; }

    [LoadColumn(4)]
    public float YearsExperience { get; set; }

    [LoadColumn(5)]
    public float AgeAtHire { get; set; }

    [LoadColumn(6)]
    public float HasKids { get; set; }

    [LoadColumn(7)]
    public float WithinMonthOfVesting { get; set; }

    [LoadColumn(8)]
    public float DeskDecorations { get; set; }

    [LoadColumn(9)]
    public float LongCommute { get; set; }
}

public class EmploymentHistoryPrediction
{
    [ColumnName("Score")]
    public float DurationInMonths;
}

In [21]:
public const string MODEL_FILENAME = "chapter3.mdl";

static readonly string inputDataFile = Path.Combine(Environment.CurrentDirectory, "./Datasets/LinearRegression/input.json");

static readonly string trainingFileName = Path.Combine(Environment.CurrentDirectory, "./Datasets/LinearRegression/sampledata.csv");

protected static string ModelPath => Path.Combine(AppContext.BaseDirectory, MODEL_FILENAME);

MLContext MlContext = new MLContext(2020);

## Treino

In [22]:
var trainingDataView = MlContext.Data.LoadFromTextFile<EmploymentHistory>(trainingFileName, ',');

var dataSplit = MlContext.Data.TrainTestSplit(trainingDataView, testFraction: 0.4);

var dataProcessPipeline = MlContext.Transforms.CopyColumns("Label", nameof(EmploymentHistory.DurationInMonths))
    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.IsMarried)))
    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.BSDegree)))
    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.MSDegree)))
    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.YearsExperience))
    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.AgeAtHire)))
    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.HasKids)))
    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.WithinMonthOfVesting)))
    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.DeskDecorations)))
    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.LongCommute)))
    .Append(MlContext.Transforms.Concatenate("Features",
        typeof(EmploymentHistory).ToPropertyList<EmploymentHistory>(nameof(EmploymentHistory.DurationInMonths)))));

var trainer = MlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");

var trainingPipeline = dataProcessPipeline.Append(trainer);

ITransformer trainedModel = trainingPipeline.Fit(dataSplit.TrainSet);

MlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, ModelPath);

var testSetTransform = trainedModel.Transform(dataSplit.TestSet);

var modelMetrics = MlContext.Regression.Evaluate(testSetTransform);

Console.WriteLine($"Loss Function: {modelMetrics.LossFunction:0.##}{Environment.NewLine}" +
                  $"Mean Absolute Error: {modelMetrics.MeanAbsoluteError:#.##}{Environment.NewLine}" +
                  $"Mean Squared Error: {modelMetrics.MeanSquaredError:#.##}{Environment.NewLine}" +
                  $"RSquared: {modelMetrics.RSquared:0.##}{Environment.NewLine}" +
                  $"Root Mean Squared Error: {modelMetrics.RootMeanSquaredError:#.##}");

Loss Function: 480,68
Mean Absolute Error: 19,26
Mean Squared Error: 480,68
RSquared: -0,31
Root Mean Squared Error: 21,92


In [23]:
DataViewSchema modelSchema;

ITransformer mlModel = MlContext.Model.Load(ModelPath, out modelSchema);

var predictionEngine = MlContext.Model.CreatePredictionEngine<EmploymentHistory, EmploymentHistoryPrediction>(mlModel);

var json = File.ReadAllText(inputDataFile);

var prediction = predictionEngine.Predict(JsonConvert.DeserializeObject<EmploymentHistory>(json));



In [24]:
Console.WriteLine(
    $"Based on input json:{System.Environment.NewLine}" +
    $"{json}{System.Environment.NewLine}" + 
    $"The employee is predicted to work {prediction.DurationInMonths:#.##} months");

Based on input json:
{
	"durationInMonths":0.0,
	"isMarried":0,
	"bSDegree":1,
	"mSDegree":0,
	"yearsExperience":2,
	"ageAtHire":29,
	"hasKids":0,
	"withinMonthOfVesting":0,
	"deskDecorations":1,
	"longCommute":1	
}
The employee is predicted to work 39,36 months
