## Using ML.NET in F# with Polyglot Notebooks
By Matt Eland

### Importing ML.NET

In [1]:
// Install the Microsoft.ML packages we're going to use for ML.NET
#r "nuget:Microsoft.ML"

In [2]:
open Microsoft.ML;
open Microsoft.ML.Data;

In [3]:
// Create an ML Context. We'll use this for all ML operations
let context = MLContext();

### Loading Data
Source: [London Weather Data](https://www.kaggle.com/datasets/emmanuelfwerr/london-weather-data)

In [4]:
type WeatherInput =
    {
        // Ignore date column
        [<LoadColumn(1)>]
        CloudCover: Single // in oktas
        [<LoadColumn(2)>]
        Sunshine: Single // in hours
        [<LoadColumn(3)>]
        GlobalRads: Single // Global radiation in Watt / square meter
        [<LoadColumn(4)>]
        MaxTemp: Single // Celsius
        [<LoadColumn(5)>]
        MeanTemp: Single // Celsius
        [<LoadColumn(6)>]
        MinTemp: Single // Celsius
        [<LoadColumn(7)>]
        Precipitation: Single // This is the precipitation in millimeters. This is what we want to predict
        [<LoadColumn(8)>]
        Pressure: Single // In Pascals
        // Ignore snow depth since that won't be known at time of prediction and is related to Precipitation
    }

In [5]:
// Load the data
let data = context.Data.LoadFromTextFile<WeatherInput>("london_weather.csv", hasHeader = true, separatorChar=',')
data.Schema

index,Name,Index,IsHidden,Type,Annotations
RawType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Schema,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
RawType,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3
Schema,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4
RawType,Unnamed: 1_level_5,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5
Schema,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6
RawType,Unnamed: 1_level_7,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7
Schema,Unnamed: 1_level_8,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8
RawType,Unnamed: 1_level_9,Unnamed: 2_level_9,Unnamed: 3_level_9,Unnamed: 4_level_9,Unnamed: 5_level_9
Schema,Unnamed: 1_level_10,Unnamed: 2_level_10,Unnamed: 3_level_10,Unnamed: 4_level_10,Unnamed: 5_level_10
RawType,Unnamed: 1_level_11,Unnamed: 2_level_11,Unnamed: 3_level_11,Unnamed: 4_level_11,Unnamed: 5_level_11
Schema,Unnamed: 1_level_12,Unnamed: 2_level_12,Unnamed: 3_level_12,Unnamed: 4_level_12,Unnamed: 5_level_12
RawType,Unnamed: 1_level_13,Unnamed: 2_level_13,Unnamed: 3_level_13,Unnamed: 4_level_13,Unnamed: 5_level_13
Schema,Unnamed: 1_level_14,Unnamed: 2_level_14,Unnamed: 3_level_14,Unnamed: 4_level_14,Unnamed: 5_level_14
RawType,Unnamed: 1_level_15,Unnamed: 2_level_15,Unnamed: 3_level_15,Unnamed: 4_level_15,Unnamed: 5_level_15
Schema,Unnamed: 1_level_16,Unnamed: 2_level_16,Unnamed: 3_level_16,Unnamed: 4_level_16,Unnamed: 5_level_16
0,CloudCover,0.0,False,RawTypeSystem.Single,SchemaDataViewSchema
RawType,,,,,
System.Single,,,,,
Schema,,,,,
DataViewSchema,,,,,
1,Sunshine,1.0,False,RawTypeSystem.Single,SchemaDataViewSchema
RawType,,,,,
System.Single,,,,,
Schema,,,,,
DataViewSchema,,,,,

RawType
System.Single

Schema
DataViewSchema

RawType
System.Single

Schema
DataViewSchema

RawType
System.Single

Schema
DataViewSchema

RawType
System.Single

Schema
DataViewSchema

RawType
System.Single

Schema
DataViewSchema

RawType
System.Single

Schema
DataViewSchema

RawType
System.Single

Schema
DataViewSchema

RawType
System.Single

Schema
DataViewSchema


In [6]:
let numRows = 3
data.Preview(numRows).RowView

index,Values
0,"KeyValuePair<String,Object>[] CloudCover: 2 Sunshine: 7 GlobalRads: 52 MaxTemp: 2.3 MeanTemp: -4.1 MinTemp: -7.5 Precipitation: 0.4 Pressure: 101900"
1,"KeyValuePair<String,Object>[] CloudCover: 6 Sunshine: 1.7 GlobalRads: 27 MaxTemp: 1.6 MeanTemp: -2.6 MinTemp: -7.5 Precipitation: 0 Pressure: 102530"
2,"KeyValuePair<String,Object>[] CloudCover: 5 Sunshine: 0 GlobalRads: 13 MaxTemp: 1.3 MeanTemp: -2.8 MinTemp: -7.2 Precipitation: 0 Pressure: 102050"


In [7]:
// Split into train and test splits to detect overfitting
let split = context.Data.TrainTestSplit(data, testFraction = 0.2)

let testSet = split.TestSet   // 20 % of the data (0.2)
let trainSet = split.TrainSet // 80 % of the data (0.8)

In [8]:
let processPipeline = EstimatorChain()
                        .Append(context.Transforms.CopyColumns("Label", "Precipitation"))
                        .Append(context.Transforms.NormalizeMeanVariance("CloudCover", "CloudCover"))
                        .Append(context.Transforms.NormalizeMeanVariance("Sunshine", "Sunshine"))
                        .Append(context.Transforms.NormalizeMeanVariance("GlobalRads", "GlobalRads"))
                        .Append(context.Transforms.NormalizeMeanVariance("MaxTemp", "MaxTemp"))
                        .Append(context.Transforms.NormalizeMeanVariance("MeanTemp", "MeanTemp"))
                        .Append(context.Transforms.NormalizeMeanVariance("MinTemp", "MinTemp"))
                        .Append(context.Transforms.NormalizeMeanVariance("Pressure", "Pressure"))
                        .Append(context.Transforms.Concatenate("Features",  [|"CloudCover";"Sunshine";"GlobalRads";"MaxTemp";"MeanTemp";"MinTemp";"Pressure"|]))
                        .AppendCacheCheckpoint context

In [9]:
// Select the algorithm we want
let trainer = context.Regression.Trainers.OnlineGradientDescent(labelColumnName = "Label", featureColumnName = "Features")

// Generate a training pipeline based on the processing pipeline mixed with the trainer
let trainingPipeline = processPipeline.Append trainer

In [10]:
// Train the model
let model = trainingPipeline.Fit trainSet

model

index,type,FeatureColumnName,FeatureColumnType,Model
Dimensions,IsKnownSize,ItemType,Size,RawType
Weights,Bias,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0,Microsoft.ML.Transforms.ColumnCopyingTransformer,,,
1,Microsoft.ML.Transforms.NormalizingTransformer,,,
2,Microsoft.ML.Transforms.NormalizingTransformer,,,
3,Microsoft.ML.Transforms.NormalizingTransformer,,,
4,Microsoft.ML.Transforms.NormalizingTransformer,,,
5,Microsoft.ML.Transforms.NormalizingTransformer,,,
6,Microsoft.ML.Transforms.NormalizingTransformer,,,
7,Microsoft.ML.Transforms.NormalizingTransformer,,,
8,Microsoft.ML.Data.ColumnConcatenatingTransformer,,,
9,Microsoft.ML.Data.RegressionPredictionTransformer<Microsoft.ML.Trainers.LinearRegressionModelParameters>,Features,DimensionsIsKnownSizeItemTypeSizeRawType[ 7 ]TrueNumberDataViewType  RawType: System.Single7Microsoft.ML.Data.VBuffer<System.Single>,"WeightsBias[ 1.2622939, -0.8087399, -0.0071948916, -0.43742964, 0.51491094, 0.28776076, 0.27060327 ]0.5830907"

Dimensions,IsKnownSize,ItemType,Size,RawType
[ 7 ],True,NumberDataViewType  RawType: System.Single,7,Microsoft.ML.Data.VBuffer<System.Single>

Weights,Bias
"[ 1.2622939, -0.8087399, -0.0071948916, -0.43742964, 0.51491094, 0.28776076, 0.27060327 ]",0.5830907


In [11]:
let testResults = model.Transform testSet

let testMetrics = context.Regression.Evaluate(testResults, "Label", "Score")

testMetrics

MeanAbsoluteError,MeanSquaredError,RootMeanSquaredError,LossFunction,RSquared
2.1125501652077654,13.199192653001656,3.633069315744149,13.199192490766798,0.0667895803734361


In [12]:
context.Model.Save(model, data.Schema, "Model.zip")

In [13]:
let loadedModel, loadedSchema = context.Model.Load "Model.zip"

In [14]:
[<CLIMutable>]
type WeatherPrediction = {
    [<ColumnName("Score")>] 
    Precipitation: Single
}

let predictionEngine = context.Model.CreatePredictionEngine<WeatherInput, WeatherPrediction> model

In [15]:
let conditions: WeatherInput = {
    CloudCover = 4f; 
    Sunshine=5.8f; 
    GlobalRads=50f; 
    MaxTemp=5.2f; 
    MinTemp=1.6f; 
    MeanTemp=3.7f; 
    Pressure=101170f;
    Precipitation=0f; // Not used, but part of input scheme
}

let prediction = predictionEngine.Predict conditions

prediction

Precipitation
1.0180497
