# ML.Net - Building a Fraud Classifier in ML.NET with Jupyter Notebooks

## Import Nuget Packages

In [1]:
// ML.NET Nuget packages 
#r "nuget:Microsoft.ML"     

// ML.NET FastTree Nuget packages 
#r "nuget:Microsoft.ML.FastTree"

// ML.NET LightGbm Nuget packages     
#r "nuget:Microsoft.ML.LightGbm"

// ML.NET AutoML Nuget packages   
#r "nuget:Microsoft.ML.AutoML"

// ML.NET Data.Analysis Nuget packages      
#r "nuget:Microsoft.Data.Analysis  

Installed package Microsoft.ML version 1.4.0

Installed package Microsoft.ML.FastTree version 1.5.0

Installed package Microsoft.Data.Analysis version 0.4.0

Installed package Microsoft.ML.AutoML version 0.17.0

Installed package Microsoft.ML.LightGbm version 1.5.0

In [2]:
// Declare Using
using Microsoft.ML;
using Microsoft.ML.Trainers.FastTree;
using System;
using System.Diagnostics;
using Microsoft.ML.Data;
using XPlot.Plotly;
using Microsoft.ML.AutoML;

In [3]:
// Create new stopwatch.
Stopwatch stopwatch = new Stopwatch();

// Begin timing.
stopwatch.Start();
Console.WriteLine("Time elapsed: {0}", stopwatch.Elapsed);

Time elapsed: 00:00:00.0000027


In [4]:
var mlContext = new MLContext(seed: 1);

var DataPath = "./Datasets/fraudulent-classifier/data.csv";

In [5]:
public sealed class Transaction
{
     [ColumnName("Step"), LoadColumn(0)]
     public float Step { get; set; }

     [ColumnName("Type"), LoadColumn(1)]
     public string Type { get; set; }

     [ColumnName("Amount"), LoadColumn(2)]
     public float Amount { get; set; }

     [ColumnName("NameOrig"), LoadColumn(3)]
     public string NameOrig { get; set; }

     [ColumnName("OldbalanceOrg"), LoadColumn(4)]
     public float OldbalanceOrg { get; set; }

     [ColumnName("NewbalanceOrig"), LoadColumn(5)]
     public float NewbalanceOrig { get; set; }

     [ColumnName("NameDest"), LoadColumn(6)]
     public string NameDest { get; set; }

     [ColumnName("OldbalanceDest"), LoadColumn(7)]
     public float OldbalanceDest { get; set; }

     [ColumnName("NewbalanceDest"), LoadColumn(8)]
     public float NewbalanceDest { get; set; }

     [ColumnName("IsFraud"), LoadColumn(9)]
     public bool IsFraud { get; set; }

     [ColumnName("IsFlaggedFraud"), LoadColumn(10)]
     public float IsFlaggedFraud { get; set; }

}

In [6]:
var data = mlContext.Data.LoadFromTextFile<Transaction>(DataPath, hasHeader: true, separatorChar: ',');

In [7]:
var testTrainData = mlContext.Data.TrainTestSplit(data);

Console.WriteLine("Time elapsed: {0}-TrainTestSplit", stopwatch.Elapsed);

Time elapsed: 00:00:03.7999626-TrainTestSplit


In [8]:
mlContext.Transforms.Categorical.OneHotEncoding(nameof(Transaction.Type))

In [9]:
var dataProcessingPipeline = mlContext.Transforms.Categorical.OneHotEncoding(nameof(Transaction.Type))
            .Append(mlContext.Transforms.Categorical.OneHotHashEncoding(nameof(Transaction.NameDest))
            .Append(mlContext.Transforms.Concatenate("Features", nameof(Transaction.Type), nameof(Transaction.NameDest), 
            nameof(Transaction.Amount), nameof(Transaction.OldbalanceOrg), nameof(Transaction.OldbalanceDest), 
            nameof(Transaction.NewbalanceOrig), nameof(Transaction.NewbalanceDest))));

Console.WriteLine("Time elapsed: {0}-BuildDataProcessingPipeline", stopwatch.Elapsed);

Time elapsed: 00:00:05.4337496-BuildDataProcessingPipeline


In [10]:
var trainingPipeline = dataProcessingPipeline
  .Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(labelColumnName: nameof(Transaction.IsFraud)));

Console.WriteLine("Time elapsed: {0}-BuildTrainingPipeline", stopwatch.Elapsed);

Time elapsed: 00:00:06.0010811-BuildTrainingPipeline


In [11]:
var trainedModel = trainingPipeline.Fit(testTrainData.TrainSet);

Console.WriteLine("Time elapsed: {0}-Fit", stopwatch.Elapsed);

Time elapsed: 00:00:09.9533032-Fit


In [12]:
var predictions = trainedModel.Transform(testTrainData.TestSet);

Console.WriteLine("Time elapsed: {0}-Transform", stopwatch.Elapsed);

Time elapsed: 00:00:09.9934016-Transform


In [13]:
//Evaluate
var metrics = mlContext.BinaryClassification.Evaluate(predictions, labelColumnName: nameof(Transaction.IsFraud));  

Console.WriteLine("Time elapsed: {0}-Evaluate", stopwatch.Elapsed);

Time elapsed: 00:00:10.3931196-Evaluate


In [14]:
Console.WriteLine($"Accuracy: {metrics.Accuracy}");
Console.WriteLine($"AUCPC: {metrics.AreaUnderPrecisionRecallCurve}");
Console.WriteLine($"Recall: {metrics.PositiveRecall}");
Console.WriteLine($"Precision: {metrics.PositivePrecision}");
Console.WriteLine($"F1Score: {metrics.F1Score}");

Accuracy: 0,9996751218293141
AUCPC: 0,6176167113303728
Recall: 0,5714285714285714
Precision: 0,75
F1Score: 0,6486486486486486


In [15]:
//Save
mlContext.Model.Save(trainedModel, data.Schema, @"./Datasets/fraudulent-classifier/LModel_LbfgsLogisticRegression.zip");
Console.WriteLine("End Program Time elapsed: {0}", stopwatch.Elapsed);
//stopwatch.Stop();

End Program Time elapsed: 00:00:10.7051855


In [16]:
var trainingPipeline = dataProcessingPipeline
  .Append(mlContext.BinaryClassification.Trainers.FastTree(new FastTreeBinaryTrainer.Options 
{ 
  NumberOfLeaves = 10, 
  NumberOfTrees = 50,  
  LabelColumnName = nameof(Transaction.IsFraud),
  FeatureColumnName = "Features" 
}));

In [17]:
var trainedModel = trainingPipeline.Fit(testTrainData.TrainSet);

Console.WriteLine("Time elapsed: {0}-Fit", stopwatch.Elapsed);

Time elapsed: 00:00:16.8928715-Fit


In [18]:
var predictions = trainedModel.Transform(testTrainData.TestSet);

Console.WriteLine("Time elapsed: {0}-Transform", stopwatch.Elapsed);

Time elapsed: 00:00:16.9427905-Transform


In [19]:
var metrics = mlContext.BinaryClassification.Evaluate(predictions, labelColumnName: nameof(Transaction.IsFraud));  

Console.WriteLine("Time elapsed: {0}-Evaluate", stopwatch.Elapsed);

Time elapsed: 00:00:17.3736906-Evaluate


In [20]:
Console.WriteLine($"Accuracy: {metrics.Accuracy}");
Console.WriteLine($"AUCPC: {metrics.AreaUnderPrecisionRecallCurve}");
Console.WriteLine($"Recall: {metrics.PositiveRecall}");
Console.WriteLine($"Precision: {metrics.PositivePrecision}");
Console.WriteLine($"F1Score: {metrics.F1Score}");

Accuracy: 0,9995001874297138
AUCPC: 0,3238944952685146
Recall: 0,14285714285714285
Precision: 0,6
F1Score: 0,23076923076923073


In [21]:
//Save
mlContext.Model.Save(trainedModel, data.Schema, @"./Datasets/fraudulent-classifier/LModel_FastTree.zip");
Console.WriteLine("End Program Time elapsed: {0}", stopwatch.Elapsed);
//stopwatch.Stop();

End Program Time elapsed: 00:00:17.5426170


# AutoML

In [None]:
using Microsoft.ML.AutoML;


var result = mlContext.Auto()
                .CreateBinaryClassificationExperiment(50)
                .Execute(testTrainData.TrainSet, labelColumnName: "IsFraud");

In [None]:
var scatters = result.RunDetails.Where(d => d.ValidationMetrics != null).GroupBy(    
    r => r.TrainerName,
    (name, details) => new Graph.Scatter()
    {
        name = name,
        x = details.Select(r => r.RuntimeInSeconds),
        y = details.Select(r => r.ValidationMetrics.Accuracy),
        mode = "markers",
        marker = new Graph.Marker() { size = 12 }
    });

var chart = Chart.Plot(scatters);
chart.WithXTitle("Training Time");
chart.WithYTitle("Accuracy");
display(chart);

Console.WriteLine($"Best Trainer:{result.BestRun.TrainerName}");

## Evaluate AutoML

In [None]:
var predictions = result.BestRun.Model.Transform(testTrainData.TestSet);
var metrics = mlContext.BinaryClassification.Evaluate(predictions, labelColumnName: "IsFraud");
display(metrics)

In [None]:
Console.WriteLine($"Accuracy: {metrics.Accuracy}");
Console.WriteLine($"AUCPC: {metrics.AreaUnderPrecisionRecallCurve}");
Console.WriteLine($"Recall: {metrics.PositiveRecall}");
Console.WriteLine($"Precision: {metrics.PositivePrecision}");
Console.WriteLine($"F1Score: {metrics.F1Score}");

## Save

In [None]:
mlContext.Model.Save(trainedModel, data.Schema, "./Datasets/fraudulent-classifier/ML_Autoodel.zip"); 

## Test Prediction

In [None]:
public class FraudPrediction
{
    // ColumnName attribute is used to change the column name from
    // its default value, which is the name of the field.
    [ColumnName("PredictedLabel")]
    public bool IsFraud { get; set; }

    [ColumnName("Score")]
    public float Score { get; set; }
}

In [None]:
PredictionEngine<Transaction, FraudPrediction> predictionEngine;

In [None]:
//Define DataViewSchema for data preparation pipeline and trained model
DataViewSchema modelSchema;

var model = mlContext.Model.Load(@"./Datasets/fraudulent-classifier/ML_Autoodel.zip", out modelSchema);
            
predictionEngine = mlContext.Model.CreatePredictionEngine<Transaction, FraudPrediction>(model);



In [31]:
var transaction = new Transaction 
{
        Amount = 1500f,
        OldbalanceDest = 100,
        NewbalanceDest = 300,
        NameDest = "C123",
        NameOrig = "B123"
};
          
var result = predictionEngine.Predict(transaction);

result

IsFraud,Score
False,-20.170843
