In [1]:
#r "nuget:Microsoft.ML"
#r "nuget:Microsoft.ML.LightGbm"
#r "Classes.dll"
#r "nuget:Microsoft.AspNetCore.Html.Abstractions, 2.2.0"

Installed package Microsoft.ML version 1.5.2

Installed package Microsoft.AspNetCore.Html.Abstractions version 2.2.0

Installed package Microsoft.ML.LightGbm version 1.5.2

In [17]:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML;
using Classes;
using Microsoft.ML.Trainers.LightGbm;
using XPlot.Plotly;
using Microsoft.ML.Data;

In [3]:
string TRAIN_DATA_FILEPATH = "train.csv";
string TEST_DATA_FILEPATH = "test.csv";

In [4]:
var context = new MLContext(seed: 1);
var trainingDataView = context.Data.LoadFromTextFile<ModelInput>(
                                            path: TRAIN_DATA_FILEPATH,
                                            hasHeader: true,
                                            separatorChar: ',',
                                            allowQuoting: true,
                                            allowSparse: false);
var samples = context.Data.CreateEnumerable<ModelInput>(trainingDataView, false).ToArray();
display(samples[0..5]);

index,Id,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,Wilderness_Area1,Wilderness_Area2,Wilderness_Area3,Wilderness_Area4,Soil_Type1,Soil_Type2,Soil_Type3,Soil_Type4,Soil_Type5,..
0,1,2596,51,3,258,0,510,221,232,148,6279,1,0,0,0,0,0,0,0,0,
1,2,2590,56,2,212,-6,390,220,235,151,6225,1,0,0,0,0,0,0,0,0,
2,3,2804,139,9,268,65,3180,234,238,135,6121,1,0,0,0,0,0,0,0,0,
3,4,2785,155,18,242,118,3090,238,238,122,6211,1,0,0,0,0,0,0,0,0,
4,5,2595,45,2,153,-1,391,220,234,150,6172,1,0,0,0,0,0,0,0,0,


In [5]:
var types = samples.Select(item=>item.Cover_Type);
var unique = types.Distinct();
int[] valuesVal = unique.Select(item=>types.Where(element=>element==item).Count()).ToArray();
var graph = new Graph.Pie()
{
    labels = Enumerable.Range(1, 7),
    values = valuesVal
};
var chart = Chart.Plot(graph);
display(chart);

In [6]:
var dataProcessPipeline = context.Transforms.Conversion.MapValueToKey("Cover_Type", "Cover_Type")
    .Append(context.Transforms.Concatenate("Features", new[] { "Elevation", "Aspect", "Slope", "Horizontal_Distance_To_Hydrology", "Vertical_Distance_To_Hydrology", "Horizontal_Distance_To_Roadways", "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm", "Horizontal_Distance_To_Fire_Points", "Wilderness_Area1", "Wilderness_Area2", "Wilderness_Area3", "Wilderness_Area4", "Soil_Type1", "Soil_Type2", "Soil_Type3", "Soil_Type4", "Soil_Type5", "Soil_Type6", "Soil_Type7", "Soil_Type8", "Soil_Type9", "Soil_Type10", "Soil_Type11", "Soil_Type12", "Soil_Type13", "Soil_Type14", "Soil_Type15", "Soil_Type16", "Soil_Type17", "Soil_Type18", "Soil_Type19", "Soil_Type20", "Soil_Type21", "Soil_Type22", "Soil_Type23", "Soil_Type24", "Soil_Type25", "Soil_Type26", "Soil_Type27", "Soil_Type28", "Soil_Type29", "Soil_Type30", "Soil_Type31", "Soil_Type32", "Soil_Type33", "Soil_Type34", "Soil_Type35", "Soil_Type36", "Soil_Type37", "Soil_Type38", "Soil_Type39", "Soil_Type40" }));

var trainer = context.MulticlassClassification.Trainers.LightGbm(new LightGbmMulticlassTrainer.Options() { 
    NumberOfIterations = 200, 
    LearningRate = 0.1104043f, 
    NumberOfLeaves = 94, 
    MinimumExampleCountPerLeaf = 20, 
    UseCategoricalSplit = true, 
    HandleMissingValue = true, 
    UseZeroAsMissingValue = false, 
    MinimumExampleCountPerGroup = 10, 
    MaximumCategoricalSplitPointCount = 16, 
    CategoricalSmoothing = 20, 
    L2CategoricalRegularization = 5, 
    UseSoftmax = false, 
    Booster = new GradientBooster.Options() { 
        L2Regularization = 0, 
        L1Regularization = 0 }, 
    LabelColumnName = "Cover_Type", 
    FeatureColumnName = "Features" })
        .Append(context.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

var pipeline = dataProcessPipeline.Append(trainer);

In [7]:
var model = pipeline.Fit(trainingDataView);

In [8]:
var crossValResults = context.MulticlassClassification.CrossValidate(
    trainingDataView, 
    pipeline, 
    numberOfFolds: 5, 
    labelColumnName: "Cover_Type");
var metricsInMultipleFolds = crossValResults.Select(r => r.Metrics);
var microAA = metricsInMultipleFolds.Select(m => m.MicroAccuracy).Average();
Console.WriteLine($"Accuracy:{microAA}");

Accuracy:0,875607294618443


In [9]:
var engine = context.Model.CreatePredictionEngine<ModelInput, ModelOutput>(model);
var testDataView = context.Data.LoadFromTextFile<ModelInputTest>(
                                            path: TEST_DATA_FILEPATH,
                                            hasHeader: true,
                                            separatorChar: ',');
var testSamples = context.Data.CreateEnumerable<ModelInputTest>(testDataView, false).ToArray();
display(testSamples[0..5]);

index,Id,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,Wilderness_Area1,Wilderness_Area2,Wilderness_Area3,Wilderness_Area4,Soil_Type1,Soil_Type2,Soil_Type3,Soil_Type4,Soil_Type5,..
0,15121,2680,354,14,0,0,2684,196,214,156,6645,1,0,0,0,0,0,0,0,0,
1,15122,2683,0,13,0,0,2654,201,216,152,6675,1,0,0,0,0,0,0,0,0,
2,15123,2713,16,15,0,0,2980,206,208,137,6344,1,0,0,0,0,0,0,0,0,
3,15124,2709,24,17,0,0,2950,208,201,125,6374,1,0,0,0,0,0,0,0,0,
4,15125,2706,29,19,0,0,2920,210,195,115,6404,1,0,0,0,0,0,0,0,0,


In [10]:
var predictions = new Dictionary<int, string>();
foreach(var item in testSamples)
{
    var prediction = engine.Predict(new ModelInput(item));
    predictions.Add((int)item.Id, prediction.Prediction);
}

In [11]:
display(predictions.TakeWhile((item, i)=>i<5));

index,Key,Value
0,15121,5
1,15122,1
2,15123,1
3,15124,1
4,15125,1


In [12]:
var path = "achifal.csv";
File.WriteAllText(path, "Id,Cover_Type\n");
File.AppendAllLines(path, predictions.Select(keyval => keyval.Key.ToString() + "," + keyval.Value));

In [29]:
public static string[] GetLabels(DataViewSchema schema, string name)
{
    var column = schema.GetColumnOrNull(name);

    var slotNames = new VBuffer<ReadOnlyMemory<char>>();
    column.Value.GetSlotNames(ref slotNames);
    var names = new string[slotNames.Length];
    var num = 0;
    foreach (var denseValue in slotNames.DenseValues())
    {
        names[num++] = denseValue.ToString();
    }
    return names.ToArray();
}
var Labels = GetLabels(engine.OutputSchema, "Score");
display(Labels);

index,value
0,5
1,2
2,1
3,7
4,3
5,6
6,4


In [30]:
ModelInput sample = new ModelInput()
{
    Elevation = 2596F,
    Aspect = 51F,
    Slope = 3F,
    Horizontal_Distance_To_Hydrology = 258F,
    Vertical_Distance_To_Hydrology = 0F,
    Horizontal_Distance_To_Roadways = 510F,
    Hillshade_9am = 221F,
    Hillshade_Noon = 232F,
    Hillshade_3pm = 148F,
    Horizontal_Distance_To_Fire_Points = 6279F,
    Wilderness_Area1 = 1F,
    Wilderness_Area2 = 0F,
    Wilderness_Area3 = 0F,
    Wilderness_Area4 = 0F,
    Soil_Type1 = 0F,
    Soil_Type2 = 0F,
    Soil_Type3 = 0F,
    Soil_Type4 = 0F,
    Soil_Type5 = 0F,
    Soil_Type6 = 0F,
    Soil_Type7 = 0F,
    Soil_Type8 = 0F,
    Soil_Type9 = 0F,
    Soil_Type10 = 0F,
    Soil_Type11 = 0F,
    Soil_Type12 = 0F,
    Soil_Type13 = 0F,
    Soil_Type14 = 0F,
    Soil_Type15 = 0F,
    Soil_Type16 = 0F,
    Soil_Type17 = 0F,
    Soil_Type18 = 0F,
    Soil_Type19 = 0F,
    Soil_Type20 = 0F,
    Soil_Type21 = 0F,
    Soil_Type22 = 0F,
    Soil_Type23 = 0F,
    Soil_Type24 = 0F,
    Soil_Type25 = 0F,
    Soil_Type26 = 0F,
    Soil_Type27 = 0F,
    Soil_Type28 = 0F,
    Soil_Type29 = 1F,
    Soil_Type30 = 0F,
    Soil_Type31 = 0F,
    Soil_Type32 = 0F,
    Soil_Type33 = 0F,
    Soil_Type34 = 0F,
    Soil_Type35 = 0F,
    Soil_Type36 = 0F,
    Soil_Type37 = 0F,
    Soil_Type38 = 0F,
    Soil_Type39 = 0F,
    Soil_Type40 = 0F,
};
var prediction = engine.Predict(sample);
Console.WriteLine(prediction.Prediction);
Array.Sort(Labels, prediction.Score);
Console.WriteLine(string.Join(' ',prediction.Score));

5
0,0067545255 0,008419351 1,3238149E-05 5,7354077E-06 0,98476785 3,625538E-05 3,0507554E-06
