# Regression using LightGBM

Use LightGBM to train a regression model on a dummy dataset and save the model to regression.mlnet. The path to that dummy dataset is taxi-fare_train.csv. Peek the first 10 rows of that dataset first and implement a regression model on fare_amount. Once completed, save the model to regression.mlnet and print 'task resolved' in the end.

In [2]:
#r "nuget:Microsoft.ML"
#r "nuget:Microsoft.ML.LightGbm"


In [3]:
using System;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;
using Microsoft.ML.Trainers.LightGbm;

// Define data structure
public class TaxiTrip
{
    [LoadColumn(0)] public string VendorId;
    [LoadColumn(1)] public string RateCode;
    [LoadColumn(2)] public float PassengerCount;
    [LoadColumn(3)] public float TripTime;
    [LoadColumn(4)] public float TripDistance;
    [LoadColumn(5)] public string PaymentType;
    [LoadColumn(6)] public float FareAmount;
}

public class TaxiTripFarePrediction
{
    [ColumnName("Score")]
    public float FareAmount;
}

var mlContext = new MLContext(seed: 0);

// Load data
var dataView = mlContext.Data.LoadFromTextFile<TaxiTrip>("taxi-fare_train.csv", separatorChar: ',');

// Peek the first 10 rows of the dataset
var peek = mlContext.Data.CreateEnumerable<TaxiTrip>(dataView, reuseRowObject: false).Take(10);
foreach (var row in peek)
{
    Console.WriteLine($"VendorId: {row.VendorId}, RateCode: {row.RateCode}, PassengerCount: {row.PassengerCount}, TripTime: {row.TripTime}, TripDistance: {row.TripDistance}, PaymentType: {row.PaymentType}, FareAmount: {row.FareAmount}");
}

// Define pipeline
var pipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: "FareAmount")
    .Append(mlContext.Transforms.Categorical.OneHotEncoding("VendorId"))
    .Append(mlContext.Transforms.Categorical.OneHotEncoding("RateCode"))
    .Append(mlContext.Transforms.Categorical.OneHotEncoding("PaymentType"))
    .Append(mlContext.Transforms.Concatenate("Features", "VendorId", "RateCode", "PassengerCount", "TripTime", "TripDistance", "PaymentType"))
    .Append(mlContext.Transforms.NormalizeMinMax("Features"))
    .Append(mlContext.Regression.Trainers.LightGbm());

// Train model
var model = pipeline.Fit(dataView);

// Save model
mlContext.Model.Save(model, dataView.Schema, "regression.mlnet");

Console.WriteLine("task resolved");

VendorId: vendor_id, RateCode: rate_code, PassengerCount: NaN, TripTime: NaN, TripDistance: NaN, PaymentType: payment_type, FareAmount: NaN
VendorId: CMT, RateCode: 1, PassengerCount: 1, TripTime: 1271, TripDistance: 3.8, PaymentType: CRD, FareAmount: 17.5
VendorId: CMT, RateCode: 1, PassengerCount: 1, TripTime: 474, TripDistance: 1.5, PaymentType: CRD, FareAmount: 8
VendorId: CMT, RateCode: 1, PassengerCount: 1, TripTime: 637, TripDistance: 1.4, PaymentType: CRD, FareAmount: 8.5
VendorId: CMT, RateCode: 1, PassengerCount: 1, TripTime: 181, TripDistance: 0.6, PaymentType: CSH, FareAmount: 4.5
VendorId: CMT, RateCode: 1, PassengerCount: 1, TripTime: 661, TripDistance: 1.1, PaymentType: CRD, FareAmount: 8.5
VendorId: CMT, RateCode: 1, PassengerCount: 1, TripTime: 935, TripDistance: 9.6, PaymentType: CSH, FareAmount: 27.5
VendorId: CMT, RateCode: 1, PassengerCount: 1, TripTime: 869, TripDistance: 2.3, PaymentType: CRD, FareAmount: 11.5
VendorId: CMT, RateCode: 1, PassengerCount: 1, TripTi