# Using ML.NET and XPlot in Jupyter notebooks
## Scenario: Regression model for Taxi fares

Regression is a ML task type of supervised machine learning algorithms. 
A regression ML model predicts continuous value outputs (such as numbers). 
For instance, predicting the fare of a Taxi trip or predicting the price of a car is a regression problem.

# Install the NuGet packages you use in the notebook

In [None]:
// ML.NET Nuget packages installation
#r "nuget:Microsoft.ML,1.4.0"
#r "nuget:Microsoft.ML.AutoML,0.16.0"
    
//Install XPlot package
#r "nuget:XPlot.Plotly,3.0.1"

using Microsoft.ML;
using Microsoft.ML.AutoML;
using Microsoft.ML.Data;
using XPlot.Plotly;

# Declare data-classes for input data and predictions

In [None]:
public class TaxiTrip
{
    [LoadColumn(0)]
    public string VendorId;

    [LoadColumn(1)]
    public string RateCode;

    [LoadColumn(2)]
    public float PassengerCount;

    [LoadColumn(3)]
    public float TripTime;

    [LoadColumn(4)]
    public float TripDistance;

    [LoadColumn(5)]
    public string PaymentType;

    [LoadColumn(6)]
    public float FareAmount;
}

public class TaxiTripFarePrediction
{
    [ColumnName("Score")]
    public float FareAmount;
}

# Load datasets into IDataView and display the schema 

In [None]:
MLContext mlContext = new MLContext(seed: 0);

string TrainDataPath = "./data/taxifare/taxi-fare-train.csv";
string TestDataPath = "./data/taxifare/taxi-fare-test.csv";

IDataView trainDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ',');
IDataView testDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ',');

display(h2("Schema of training DataView:"));
display(trainDataView.Schema);


## Show a few rows of loaded data 

In [None]:
public static List<TaxiTrip> Head(MLContext mlContext, IDataView dataView, int numberOfRows = 4)
{
    string msg = string.Format("DataView: Showing {0} rows with the columns", numberOfRows.ToString());
    display(msg);
          
    var rows = mlContext.Data.CreateEnumerable<TaxiTrip>(dataView, reuseRowObject: false)
                    .Take(numberOfRows)
                    .ToList();
    
    return rows;
}

display(h3("Showing a few rows from training DataView"));

var fewRows = Head(mlContext, trainDataView, 5);
display(fewRows);

In [None]:
private const int Width = 114;

private static void CreateRow(string message, int width)
{
    Console.WriteLine("|" + message.PadRight(width - 2) + "|");
}

public static void PrintRegressionMetricsHeader()
{
    CreateRow($"{"",-4} {"Trainer",-35} {"RSquared",8} {"Absolute-loss",13} {"Squared-loss",12} {"RMS-loss",8} {"Duration",9}", Width);
}
        
public static void PrintIterationMetrics(int iteration, string trainerName, RegressionMetrics metrics, double? runtimeInSeconds)
{
    CreateRow($"{iteration,-4} {trainerName,-35} {metrics?.RSquared ?? double.NaN,8:F4} {metrics?.MeanAbsoluteError ?? double.NaN,13:F2} {metrics?.MeanSquaredError ?? double.NaN,12:F2} {metrics?.RootMeanSquaredError ?? double.NaN,8:F2} {runtimeInSeconds.Value,9:F1}", Width);
}

public static void PrintTopModels(ExperimentResult<RegressionMetrics> experimentResult)
{
    // Get top few runs ranked by R-Squared.
    // R-Squared is a metric to maximize, so OrderByDescending() is correct.
    // For RMSE and other regression metrics, OrderByAscending() is correct.
    var topRuns = experimentResult.RunDetails
        .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.RSquared))
        .OrderByDescending(r => r.ValidationMetrics.RSquared).Take(3);

    Console.WriteLine("Top models ranked by R-Squared --");
    PrintRegressionMetricsHeader();
    for (var i = 0; i < topRuns.Count(); i++)
    {
        var run = topRuns.ElementAt(i);
        PrintIterationMetrics(i + 1, run.TrainerName, run.ValidationMetrics, run.RuntimeInSeconds);
    }
}

public static void PrintRegressionMetrics(string name, RegressionMetrics metrics)
{
    Console.WriteLine($"*************************************************");
    Console.WriteLine($"*       Metrics for {name} regression model      ");
    Console.WriteLine($"*------------------------------------------------");
    Console.WriteLine($"*       LossFn:        {metrics.LossFunction:0.##}");
    Console.WriteLine($"*       R2 Score:      {metrics.RSquared:0.##}");
    Console.WriteLine($"*       Absolute loss: {metrics.MeanAbsoluteError:#.##}");
    Console.WriteLine($"*       Squared loss:  {metrics.MeanSquaredError:#.##}");
    Console.WriteLine($"*       RMS loss:      {metrics.RootMeanSquaredError:#.##}");
    Console.WriteLine($"*************************************************");
}

In [None]:
private const int ExperimentTime = 60;
private const string LabelColumnName = "FareAmount";

Console.WriteLine("=============== Training the model ===============");
Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds...");
ExperimentResult<RegressionMetrics> experimentResult = mlContext.Auto()
                .CreateRegressionExperiment(ExperimentTime)
                .Execute(trainDataView, LabelColumnName);
                
Console.WriteLine();
PrintTopModels(experimentResult);

In [None]:
Console.WriteLine("===== Evaluating model's accuracy with test data =====");
RunDetail<RegressionMetrics> best = experimentResult.BestRun;
ITransformer trainedModel = best.Model;
IDataView predictions = trainedModel.Transform(testDataView);
var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: LabelColumnName, scoreColumnName: "Score");

// Print metrics from top model
Console.WriteLine();
PrintRegressionMetrics(best.TrainerName, metrics);

In [None]:
private const string ModelFilePath = "./AutoMLRegressionModel.zip";

mlContext.Model.Save(trainedModel, trainDataView.Schema, ModelFilePath);
Console.WriteLine($"The model is saved to {ModelFilePath}");

In [None]:
Console.WriteLine("=============== Testing prediction engine ===============");

// Sample: 
// vendor_id,rate_code,passenger_count,trip_time_in_secs,trip_distance,payment_type,fare_amount
// VTS,1,1,1140,3.75,CRD,15.5

var taxiTripSample = new TaxiTrip()
{
    VendorId = "VTS",
    RateCode = "1",
    PassengerCount = 1,
    TripTime = 1140,
    TripDistance = 3.75f,
    PaymentType = "CRD",
    FareAmount = 0 // To predict. Actual/Observed = 15.5
};

ITransformer trainedModel = mlContext.Model.Load(ModelFilePath, out var modelInputSchema);

// Create prediction engine related to the loaded trained model
var predEngine = mlContext.Model.CreatePredictionEngine<TaxiTrip, TaxiTripFarePrediction>(trainedModel);

// Score
var predictedResult = predEngine.Predict(taxiTripSample);

Console.WriteLine($"**********************************************************************");
Console.WriteLine($"Predicted fare: {predictedResult.FareAmount:0.####}, actual fare: 15.5");
Console.WriteLine($"**********************************************************************");