# Prepare for data analyze

In [1]:
// ML.NET Nuget packages installation
#r "nuget:Microsoft.ML"
    
//Install XPlot package
#r "nuget:XPlot.Plotly"

//Install Recommender
#r "nuget:Microsoft.ML.Recommender"

using Microsoft.ML;
using Microsoft.ML.Recommender;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
using System.Linq;
using XPlot.Plotly;

Installing package Microsoft.ML.Recommender...

Installing package Microsoft.ML...

Installing package XPlot.Plotly...

Error: [object Object]

## Models

In [1]:
public class MovieRating
{
    [LoadColumn(0)]
    public string ProductId;
    [LoadColumn(1)]
    public float UserId;
    [LoadColumn(2)]
    public float Label;
    [LoadColumn(3)]
    public long Time;
}

public class MovieRatingPrediction
{
    public float Label;
    public float Score;
}


## Load Data

In [1]:

string trainDataPath = "./Data/ratings_Home_and_Kitchen.csv";
string testDataPath = "./Data/ratings_Home_and_Kitchen_test.csv";
MLContext mlContext = new MLContext();
IDataView trainDataView = mlContext.Data.LoadFromTextFile<MovieRating>(trainDataPath, hasHeader: false, separatorChar: ',');
IDataView testDataView = mlContext.Data.LoadFromTextFile<MovieRating>(testDataPath, hasHeader: false, separatorChar: ',');

display("Schema of training DataView:");
display(trainDataView.Preview(10).RowView);

Schema of training DataView:

index,Values
0,"[ ProductId: A210NOCSTBT4OD, UserId: 76144010, Label: 4, Time: 1349308800 ]"
1,"[ ProductId: A28ILV4TOG8BH2, UserId: 130350590, Label: 5, Time: 1300752000 ]"
2,"[ ProductId: A31B4D7URW4DNZ, UserId: 307394530, Label: 2, Time: 1214784000 ]"
3,"[ ProductId: A2HU0RPDRZZOP1, UserId: 307394530, Label: 5, Time: 1277337600 ]"
4,"[ ProductId: A7J0XOW7DYBBD, UserId: 307394530, Label: 5, Time: 1393113600 ]"
5,"[ ProductId: A9QY9F2Y7KDFW, UserId: 307394530, Label: 5, Time: 1218499200 ]"
6,"[ ProductId: A2MGJ4CELCSG10, UserId: 307394530, Label: 5, Time: 1234137600 ]"
7,"[ ProductId: A1S0DWONNWB8NH, UserId: 307394530, Label: 5, Time: 1266278400 ]"
8,"[ ProductId: A1FUE4TEATOQVV, UserId: 307394530, Label: 3, Time: 1220659200 ]"
9,"[ ProductId: A399RHVLQVXMA1, UserId: 307394530, Label: 5, Time: 1312675200 ]"


In [1]:
# Histograms

Error: [object Object]

## Get columns

In [1]:
//Extract some data into arrays for plotting:

int numberOfRows = 1000;
float[] users = trainDataView.GetColumn<float>(nameof(MovieRating.UserId)).Take(numberOfRows).ToArray();
float[] scores = trainDataView.GetColumn<float>(nameof(MovieRating.Label)).Take(numberOfRows).ToArray();
string[] productIds = trainDataView.GetColumn<string>(nameof(MovieRating.ProductId)).Take(numberOfRows).ToArray();
long[] times = trainDataView.GetColumn<long>(nameof(MovieRating.Time)).Take(numberOfRows).ToArray();
var products =  mlContext.Data.CreateEnumerable<MovieRating>(trainDataView, reuseRowObject: true);

## Histogram numbers of scores

In [1]:
// Distribution of number of scores
//XPlot Histogram reference: http://tpetricek.github.io/XPlot/reference/xplot-plotly-graph-histogram.html

var faresHistogram = Chart.Plot(new Histogram(){x = scores, autobinx = false, nbinsx = 20});
var layout = new Layout.Layout(){title="Number of scores"};
faresHistogram.WithLayout(layout);
faresHistogram.WithXTitle("Scores");
faresHistogram.WithYTitle("Numbers");
faresHistogram.Show();
display(faresHistogram);

Height,Id,PlotlySrc,Width
500,b8d40416-51f0-458f-8ccb-fefbb7088a20,https://cdn.plot.ly/plotly-latest.min.js,900


## Scatter

In [1]:
//var collection = products.GroupBy(x=>x.ProductId).OrderByDescending(x=>x.Count());
//var scorses = collection.First(x=>x.Any(y=>y.Label<5)).Select(x=>x.Label).Take(500);
//display(scorses);
var chartFareVsTime = Chart.Plot(
    new Scatter()
    {
        x = times.Select(x=>new DateTime(TimeSpan.FromMilliseconds(x).Ticks)).ToArray(),
        y = scores.Take(20),
        mode = "markers",
        marker = new Marker()
        {
            color = scores,
            colorscale = "Jet"
        }
    }
);

var layout = new Layout.Layout(){title="Plot Scores and dateTime publish"};
chartFareVsTime.WithLayout(layout);
chartFareVsTime.Width = 500;
chartFareVsTime.Height = 500;
chartFareVsTime.WithXTitle("User");
chartFareVsTime.WithYTitle("Scores");
chartFareVsTime.WithLegend(false);
chartFareVsTime.Show();
display(chartFareVsTime);

Height,Id,PlotlySrc,Width
500,3e87e1cd-46e5-43f7-a765-b6eaca3a52d2,https://cdn.plot.ly/plotly-latest.min.js,500


# Build and train model

In [1]:
IEstimator<ITransformer> estimator = mlContext
    .Transforms
    .Conversion
    .MapValueToKey(outputColumnName: "UserIdEncoded", inputColumnName: "UserId")
    .Append(mlContext
        .Transforms
        .Conversion
        .MapValueToKey(outputColumnName: "ProductIdEncoded", inputColumnName: "ProductId"));
var options = new MatrixFactorizationTrainer.Options
{
    MatrixColumnIndexColumnName = "UserIdEncoded",
    MatrixRowIndexColumnName = "ProductIdEncoded",
    LabelColumnName = "Label",
    NumberOfIterations = 20,
    ApproximationRank = 100
};
var trainerEstimator = estimator
    .Append(mlContext
        .Recommendation()
        .Trainers
        .MatrixFactorization(options));
Console.WriteLine("========================== Training the model =============================");
ITransformer model = trainerEstimator.Fit(trainDataView);
display(model.GetOutputSchema(trainDataView.Schema))



index,Name,Index,IsHidden,Type,Annotations
0,ProductId,0,False,String,
1,UserId,1,False,Single,
2,Label,2,False,Single,
3,Time,3,False,Int64,
4,UserIdEncoded,4,False,"Key<UInt32, 0-280>",KeyValues
5,ProductIdEncoded,5,False,"Key<UInt32, 0-999999>",KeyValues
6,Score,6,False,Single,"ScoreColumnSetId, ScoreColumnKind, ScoreValueKind"


## Evaluate model

In [1]:
public static void EvaluateModel(MLContext mlContext, IDataView testDataView, ITransformer model)
{
    Console.WriteLine("========================== Evaluating the model =============================");
    var prediction = model.Transform(testDataView);
    var metrics = mlContext
        .Regression
        .Evaluate(prediction, labelColumnName: "Label", scoreColumnName: "Score");
    Console.WriteLine("Root Mean Squared Error : " + metrics.RootMeanSquaredError.ToString());
    Console.WriteLine("RSquared: " + metrics.RSquared.ToString());
}
EvaluateModel(mlContext, testDataView, model);



Root Mean Squared Error : 0.15041039992187646


RSquared: 0.9720697876108068


# Predict by model

## Single prediction

In [1]:
public static void UseModelForSinglePrediction(MLContext mlContext, ITransformer model, IEnumerable<MovieRating> products)
{
    Console.WriteLine("=========================== Making a prediction =============================");
    var predictionEngine = mlContext
        .Model
        .CreatePredictionEngine<MovieRating, MovieRatingPrediction>(model);

    foreach(var product in products.GroupBy(x=>x.ProductId).Select(x=>x.Key).Take(10))
    {
        var testInput = new MovieRating { UserId = 0307394530, ProductId = product};
        var movieRatingPrediction = predictionEngine.Predict(testInput);
        if (Math.Round(movieRatingPrediction.Score, 1) > 3.5)
        {
            Console.WriteLine("Product " + testInput.ProductId + " is recommended for user " + testInput.UserId + ". Score=" + movieRatingPrediction.Score);
        }
        else
        {
            Console.WriteLine("Product " + testInput.ProductId + " is not recommended for user " + testInput.UserId + ". Score=" + movieRatingPrediction.Score);
        }
    }
}
UseModelForSinglePrediction(mlContext, model, products);



Product A210NOCSTBT4OD is recommended for user 307394530. Score=4.6681795


Product A28ILV4TOG8BH2 is recommended for user 307394530. Score=5.512721


Product A31B4D7URW4DNZ is not recommended for user 307394530. Score=1.9500381


Product A2HU0RPDRZZOP1 is recommended for user 307394530. Score=4.8403964


Product A7J0XOW7DYBBD is recommended for user 307394530. Score=4.791581


Product A9QY9F2Y7KDFW is recommended for user 307394530. Score=4.836619


Product A2MGJ4CELCSG10 is recommended for user 307394530. Score=4.794322


Product A1S0DWONNWB8NH is recommended for user 307394530. Score=4.832559


Product A1FUE4TEATOQVV is not recommended for user 307394530. Score=2.9350376


Product A399RHVLQVXMA1 is recommended for user 307394530. Score=4.8055415


## SaveModel

In [1]:
var modelPath = "./Data/MovieRecommenderModel.zip";
public static void SaveModel(MLContext mlContext, DataViewSchema trainDataViewSchema, ITransformer model, string modelPath)
{

    Console.WriteLine("========================== Saving the model to a file ==================================");
    mlContext
        .Model
        .Save(model, trainDataViewSchema, modelPath);
}
SaveModel(mlContext, trainDataView.Schema, model, modelPath);



## Get Model from a file

In [1]:
MLContext mLContext = new MLContext();
DataViewSchema schema;
var model1 = mlContext.Model.Load(modelPath, out schema);

## Evaluate model from a file

In [1]:
EvaluateModel(mlContext, testDataView, model1);



Root Mean Squared Error : 0.15041039992187646


RSquared: 0.9720697876108068
