
### This Interactive Notebook was generated by ML.NET Tooling.

The code below demonstrates how to

1. Define the model input and output schema
1. Load in data from a text file to an IDataView
1. Set up the training pipeline with data transforms
1. Choose an algorithm and append it to the pipeline
1. Train the model
1. Evaluate the model
1. Consume the model


## Install the necessary NuGet packages for training ML.NET model and plotting:

In [None]:

/* ML.NET Model Builder generated Notebook file. Notebook files contain both code snippets and rich text elements.
Use the "run" button in the left margin to execute each code snippet and explore ML.NET. */

#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json" 
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json" 

#r "nuget:Microsoft.ML,1.5.5"
#r "nuget:Microsoft.Data.Analysis,0.4.0"
#r "nuget:XPlot.Plotly.Interactive, 4.0.1"
#r "nuget:Microsoft.ML.FastTree,1.5.5"


In [None]:
using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;
using Microsoft.DotNet.Interactive.Formatting;
using Microsoft.Data.Analysis;

In [None]:
// Register your dataset into a dataframe to nicely display metrics

using Microsoft.AspNetCore.Html;
using Microsoft.DotNet.Interactive.Formatting;
using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;

Formatter.Register<DataFrame>((df, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));
    var rows = new List<List<IHtmlContent>>();
    var take = 20;
    for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var obj in df.Rows[i])
        {            cells.Add(td(obj));
        }
        rows.Add(cells);
    }
    
    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));
    
    writer.Write(t);
}, "text/html");

In [None]:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;

## Load dataset

In [None]:
// Define path to training data
string trainDataPath = @"C:\Users\xiaoyuz\Desktop\taxi-fare-train.csv";
var df = DataFrame.LoadCsv(trainDataPath);
df.Head(10)

index,vendor_id,rate_code,passenger_count,trip_time_in_secs,trip_distance,payment_type,fare_amount
0,CMT,1,1,1271,3.8,CRD,17.5
1,CMT,1,1,474,1.5,CRD,8.0
2,CMT,1,1,637,1.4,CRD,8.5
3,CMT,1,1,181,0.6,CSH,4.5
4,CMT,1,1,661,1.1,CRD,8.5
5,CMT,1,1,935,9.6,CSH,27.5
6,CMT,1,1,869,2.3,CRD,11.5
7,CMT,1,1,454,1.4,CRD,7.5
8,CMT,1,1,366,1.5,CSH,7.5
9,CMT,1,1,252,0.6,CSH,5.0


## Split dataset

In [None]:
var mlContext = new MLContext();
var dataset = mlContext.Data.ShuffleRows(df);
var trainTestSplit = mlContext.Data.TrainTestSplit(dataset);
var train = trainTestSplit.TrainSet;
var test = trainTestSplit.TestSet;

## Create, train and evaluate the training pipeline.

In [None]:
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
using Microsoft.ML;
using Microsoft.ML.Runtime;
using Microsoft.ML.Trainers.FastTree;
using Microsoft.ML.FastTree;

mlContext.Log += (object sender, LoggingEventArgs e) => {
    if(e.Kind == ChannelMessageKind.Info)
        Console.WriteLine(e.Message);
};

// Append the trainer to the data processing pipeline
var pipeline = mlContext.Transforms.Categorical.OneHotEncoding(new []{new InputOutputColumnPair(@"vendor_id", @"vendor_id"),new InputOutputColumnPair(@"payment_type", @"payment_type")})      
    .Append(mlContext.Transforms.ReplaceMissingValues(new []{new InputOutputColumnPair(@"rate_code", @"rate_code"),new InputOutputColumnPair(@"passenger_count", @"passenger_count"),new InputOutputColumnPair(@"trip_time_in_secs", @"trip_time_in_secs"),new InputOutputColumnPair(@"trip_distance", @"trip_distance")}))      
    .Append(mlContext.Transforms.Concatenate(@"Features", new []{@"vendor_id",@"payment_type",@"rate_code",@"passenger_count",@"trip_time_in_secs",@"trip_distance"}))      
    .Append(mlContext.Regression.Trainers.FastForest(new FastForestRegressionTrainer.Options(){NumberOfTrees=4,FeatureFraction=1F,LabelColumnName=@"fare_amount",FeatureColumnName=@"Features"}));

// Train the model (fit the model to the training data)
var model = pipeline.Fit(train);
var data = model.Transform(test);
var eval = mlContext.Regression.Evaluate(data, "fare_amount");
eval


[Source=Converter; InitDataset, Kind=Info] Making per-feature arrays


[Source=Converter; InitBoundariesAndLabels, Kind=Info] Changing data from row-wise to column-wise


[Source=Converter; InitDataset, Kind=Info] Processed 90016 instances


[Source=Converter; InitDataset, Kind=Info] Binning and forming Feature objects


[Source=FastTreeTraining; Training, Kind=Info] Reserved memory for tree learner: 85384 bytes


[Source=FastTreeTraining; Training, Kind=Info] Starting to train ...


MeanAbsoluteError,MeanSquaredError,RootMeanSquaredError,LossFunction,RSquared
0.910319966240189,6.471575990296979,2.543929242391969,6.471576159433325,0.9283817714406402


## Save the model

In [None]:
mlContext.Model.Save(model, train.Schema, "taxi-fare.zip");