# Prepare for data analyze

In [None]:
// ML.NET Nuget packages installation
#r "nuget:Microsoft.ML"
    
//Install XPlot package
#r "nuget:XPlot.Plotly"

//Install Recommender
#r "nuget:Microsoft.ML.Recommender"

using Microsoft.ML;
using Microsoft.ML.Recommender;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
using System.Linq;
using System.Text.Json;
using System.IO;
using XPlot.Plotly;

## Models

In [None]:
public class ReviewModel
{
    public string reviewerID { get; set; }
    public string asin { get; set; }
    public string reviewerName { get; set; }
    public int[] helpful { get; set; }
    public string reviewText { get; set; }
    /// <summary>
    /// Оценка
    /// </summary>
    public float overall { get; set; }
    public string summary { get; set; }
    public int unixReviewTime { get; set; }
    public string reviewTime { get; set; }
}
public class ProductPrediction
{
    public string asin;
    public float Score;
}

## Load Data

In [None]:
var fileName = "./Data/Clothing_Shoes_and_Jewelry_5.json";
var jsonString = File.ReadAllText(fileName);
jsonString = jsonString.Replace('\n', ',');
var reviewsSrc = Newtonsoft.Json.JsonConvert.DeserializeObject<List<ReviewModel>>("["+jsonString+"]");
MLContext mlContext = new MLContext();
int countTest = reviewsSrc.Count()/5;
var trainDataView = mlContext.Data.LoadFromEnumerable<ReviewModel>(reviewsSrc);
var testDataView = mlContext.Data.LoadFromEnumerable<ReviewModel>(reviewsSrc.Take(countTest));
display("Schema of training DataView:");
display(trainDataView.Preview(1).RowView);
display(reviewsSrc.Skip(countTest).ToList().Count);
display(reviewsSrc.Count);

Schema of training DataView:

index,Values
0,"[ reviewerID: A1KLRMWW2FWPL4, asin: 0000031887, reviewerName: Amazon Customer ""cameramom"", helpful: { Dense vector of size 2: IsDense: True, Length: 2 }, reviewText: This is a great tutu and at a really great price. It doesn't look cheap at all. I'm so glad I looked on Amazon and found such an affordable tutu that isn't made poorly. A++, overall: 5, summary: Great tutu- not cheaply made, unixReviewTime: 1297468800, reviewTime: 02 12, 2011 ]"


# Histograms

## Get columns

In [None]:
//Extract some data into arrays for plotting:

int numberOfRows = 1000;
float[] scores = trainDataView.GetColumn<float>(nameof(ReviewModel.overall)).Take(numberOfRows).ToArray();
int[] times = trainDataView.GetColumn<int>(nameof(ReviewModel.unixReviewTime)).Take(numberOfRows).ToArray();
string[] reviews = trainDataView.GetColumn<string>(nameof(ReviewModel.reviewText)).Take(numberOfRows).ToArray();
string[] names = trainDataView.GetColumn<string>(nameof(ReviewModel.reviewerName)).Take(numberOfRows).ToArray();

## Histogram numbers of scores

In [None]:
// Distribution of number of scores
//XPlot Histogram reference: http://tpetricek.github.io/XPlot/reference/xplot-plotly-graph-histogram.html

var faresHistogram = Chart.Plot(new Histogram(){x = scores, autobinx = false, nbinsx = 20});
var layout = new Layout.Layout(){title="Number of scores"};
faresHistogram.WithLayout(layout);
faresHistogram.WithXTitle("Scores");
faresHistogram.WithYTitle("Numbers");
faresHistogram.Show();
display(faresHistogram);

Height,Id,PlotlySrc,Width
500,3285a307-3868-4117-9aef-bb7b5bea08fa,https://cdn.plot.ly/plotly-latest.min.js,900


## Score and Review Length

In [None]:
var chartFareVsTime = Chart.Plot(
    new Scatter()
    {
        x =scores.Take(200) ,
        y = reviews.Select(x=>x.Length).Take(200).ToArray(),
        mode = "markers",
        marker = new Marker()
        {
            color = scores,
            colorscale = "Jet"
        }
    }
);

var layout = new Layout.Layout(){title="Plot score and review length"};
chartFareVsTime.WithLayout(layout);
chartFareVsTime.Width = 500;
chartFareVsTime.Height = 500;
chartFareVsTime.WithXTitle("Scores");
chartFareVsTime.WithYTitle("Review Length");
chartFareVsTime.WithLegend(false);
chartFareVsTime.Show();
display(chartFareVsTime);

Height,Id,PlotlySrc,Width
500,12639852-f618-4601-85fc-3f69cc927994,https://cdn.plot.ly/plotly-latest.min.js,500


In [None]:
//var collection = products.GroupBy(x=>x.ProductId).OrderByDescending(x=>x.Count());
//var scorses = collection.First(x=>x.Any(y=>y.Label<5)).Select(x=>x.Label).Take(500);
//display(scorses);
var chartFareVsTime = Chart.Plot(
    new Box()
    {
        x =scores ,
        y = reviews.Select(x=>x.Length).Where(x=>x<2000).ToArray()
    }
);

var layout = new Layout.Layout(){title="Plot score and review length"};
chartFareVsTime.WithLayout(layout);
chartFareVsTime.Width = 500;
chartFareVsTime.Height = 500;
chartFareVsTime.WithXTitle("Scores");
chartFareVsTime.WithYTitle("Review Length");
chartFareVsTime.WithLegend(false);
chartFareVsTime.Show();
display(chartFareVsTime);

Height,Id,PlotlySrc,Width
500,aefceed3-d0db-474e-bf33-fb883e095353,https://cdn.plot.ly/plotly-latest.min.js,500


# Build and train model

In [None]:
IEstimator<ITransformer> estimator = mlContext
    .Transforms
    .Conversion
    .MapValueToKey(outputColumnName: "ReviewerIdEncoded", inputColumnName: $"{nameof(ReviewModel.reviewerID)}")
    .Append(mlContext
        .Transforms
        .Conversion
        .MapValueToKey(outputColumnName: "ProductIdEncoded", inputColumnName: $"{nameof(ReviewModel.asin)}"));
var options = new MatrixFactorizationTrainer.Options
{
    MatrixColumnIndexColumnName = "ReviewerIdEncoded",
    MatrixRowIndexColumnName = "ProductIdEncoded",
    LabelColumnName = $"{nameof(ReviewModel.overall)}",
    NumberOfIterations = 20,
    ApproximationRank = 100
};
var trainerEstimator = estimator
    .Append(mlContext
        .Recommendation()
        .Trainers
        .MatrixFactorization(options));
Console.WriteLine("========================== Training the model =============================");
ITransformer model = trainerEstimator.Fit(trainDataView);
display(model.GetOutputSchema(trainDataView.Schema))



index,Name,Index,IsHidden,Type,Annotations
RawType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Schema,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
RawType,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3
Schema,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4
RawType,Unnamed: 1_level_5,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5
Schema,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6
Dimensions,IsKnownSize,ItemType,Size,RawType,Unnamed: 5_level_7
Schema,Unnamed: 1_level_8,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8
RawType,Unnamed: 1_level_9,Unnamed: 2_level_9,Unnamed: 3_level_9,Unnamed: 4_level_9,Unnamed: 5_level_9
Schema,Unnamed: 1_level_10,Unnamed: 2_level_10,Unnamed: 3_level_10,Unnamed: 4_level_10,Unnamed: 5_level_10
RawType,Unnamed: 1_level_11,Unnamed: 2_level_11,Unnamed: 3_level_11,Unnamed: 4_level_11,Unnamed: 5_level_11
Schema,Unnamed: 1_level_12,Unnamed: 2_level_12,Unnamed: 3_level_12,Unnamed: 4_level_12,Unnamed: 5_level_12
RawType,Unnamed: 1_level_13,Unnamed: 2_level_13,Unnamed: 3_level_13,Unnamed: 4_level_13,Unnamed: 5_level_13
Schema,Unnamed: 1_level_14,Unnamed: 2_level_14,Unnamed: 3_level_14,Unnamed: 4_level_14,Unnamed: 5_level_14
RawType,Unnamed: 1_level_15,Unnamed: 2_level_15,Unnamed: 3_level_15,Unnamed: 4_level_15,Unnamed: 5_level_15
Schema,Unnamed: 1_level_16,Unnamed: 2_level_16,Unnamed: 3_level_16,Unnamed: 4_level_16,Unnamed: 5_level_16
RawType,Unnamed: 1_level_17,Unnamed: 2_level_17,Unnamed: 3_level_17,Unnamed: 4_level_17,Unnamed: 5_level_17
Schema,Unnamed: 1_level_18,Unnamed: 2_level_18,Unnamed: 3_level_18,Unnamed: 4_level_18,Unnamed: 5_level_18
Count,RawType,Unnamed: 2_level_19,Unnamed: 3_level_19,Unnamed: 4_level_19,Unnamed: 5_level_19
Schema,Unnamed: 1_level_20,Unnamed: 2_level_20,Unnamed: 3_level_20,Unnamed: 4_level_20,Unnamed: 5_level_20
Count,RawType,Unnamed: 2_level_21,Unnamed: 3_level_21,Unnamed: 4_level_21,Unnamed: 5_level_21
Schema,Unnamed: 1_level_22,Unnamed: 2_level_22,Unnamed: 3_level_22,Unnamed: 4_level_22,Unnamed: 5_level_22
RawType,Unnamed: 1_level_23,Unnamed: 2_level_23,Unnamed: 3_level_23,Unnamed: 4_level_23,Unnamed: 5_level_23
Schema,Unnamed: 1_level_24,Unnamed: 2_level_24,Unnamed: 3_level_24,Unnamed: 4_level_24,Unnamed: 5_level_24
0,reviewerID,0,False,RawTypeSystem.ReadOnlyMemory<System.Char>,Schema[ ]
RawType,,,,,
System.ReadOnlyMemory<System.Char>,,,,,
Schema,,,,,
[ ],,,,,
1,asin,1,False,RawTypeSystem.ReadOnlyMemory<System.Char>,Schema[ ]
RawType,,,,,
System.ReadOnlyMemory<System.Char>,,,,,
Schema,,,,,
[ ],,,,,

RawType
System.ReadOnlyMemory<System.Char>

Schema
[ ]

RawType
System.ReadOnlyMemory<System.Char>

Schema
[ ]

RawType
System.ReadOnlyMemory<System.Char>

Schema
[ ]

Dimensions,IsKnownSize,ItemType,Size,RawType
[ 0 ],False,{ Int32: RawType: System.Int32 },0,Microsoft.ML.Data.VBuffer<System.Int32>

Schema
[ ]

RawType
System.ReadOnlyMemory<System.Char>

Schema
[ ]

RawType
System.Single

Schema
[ ]

RawType
System.ReadOnlyMemory<System.Char>

Schema
[ ]

RawType
System.Int32

Schema
[ ]

RawType
System.ReadOnlyMemory<System.Char>

Schema
[ ]

Count,RawType
39387,System.UInt32

Schema
"[ { KeyValues: Vector<String, 39387>: Name: KeyValues, Index: 0, IsHidden: False, Type: { Vector<String, 39387>: Dimensions: [ 39387 ], IsKnownSize: True, ItemType: { String: RawType: System.ReadOnlyMemory`1[System.Char] }, Size: 39387, RawType: Microsoft.ML.Data.VBuffer<System.ReadOnlyMemory<System.Char>> }, Annotations: { : Schema: [ ] } } ]"

Count,RawType
23033,System.UInt32

Schema
"[ { KeyValues: Vector<String, 23033>: Name: KeyValues, Index: 0, IsHidden: False, Type: { Vector<String, 23033>: Dimensions: [ 23033 ], IsKnownSize: True, ItemType: { String: RawType: System.ReadOnlyMemory`1[System.Char] }, Size: 23033, RawType: Microsoft.ML.Data.VBuffer<System.ReadOnlyMemory<System.Char>> }, Annotations: { : Schema: [ ] } } ]"

RawType
System.Single

Schema
"[ { ScoreColumnSetId: Key<UInt32, 0-2147483646>: Name: ScoreColumnSetId, Index: 0, IsHidden: False, Type: { Key<UInt32, 0-2147483646>: Count: 2147483647, RawType: System.UInt32 }, Annotations: { : Schema: [ ] } }, { ScoreColumnKind: String: Name: ScoreColumnKind, Index: 1, IsHidden: False, Type: { String: RawType: System.ReadOnlyMemory<System.Char> }, Annotations: { : Schema: [ ] } }, { ScoreValueKind: String: Name: ScoreValueKind, Index: 2, IsHidden: False, Type: { String: RawType: System.ReadOnlyMemory<System.Char> }, Annotations: { : Schema: [ ] } } ]"


## Evaluate model

In [None]:
public static void EvaluateModel(MLContext mlContext, IDataView testDataView, ITransformer model)
{
    Console.WriteLine("========================== Evaluating the model =============================");
    var prediction = model.Transform(testDataView);
    var metrics = mlContext
        .Regression
        .Evaluate(prediction, labelColumnName: $"{nameof(ReviewModel.overall)}", scoreColumnName: $"{nameof(ProductPrediction.Score)}");
    Console.WriteLine("Root Mean Squared Error : " + metrics.RootMeanSquaredError.ToString());
    Console.WriteLine("RSquared: " + metrics.RSquared.ToString());
}
EvaluateModel(mlContext, testDataView, model);



Root Mean Squared Error : 0,36354854529594377


RSquared: 0,8871808716399533


# Predict by model

## Single prediction

In [None]:
public static void UseModelForSinglePrediction(MLContext mlContext, ITransformer model, IEnumerable<ReviewModel> products)
{
    Console.WriteLine("=========================== Making a prediction =============================");
    var predictionEngine = mlContext
        .Model
        .CreatePredictionEngine<ReviewModel, ProductPrediction>(model);

    foreach(var product in products.GroupBy(x=>x.asin).Select(x=>x.Key).Take(10))
    {
        var testInput = new ReviewModel { reviewerID = "A1KLRMWW2FWPL4", asin = product};
        var movieRatingPrediction = predictionEngine.Predict(testInput);
        if (Math.Round(movieRatingPrediction.Score, 1) > 3.5)
        {
            Console.WriteLine("Product " + testInput.asin + " is recommended for user " + testInput.reviewerID + ". Score=" + movieRatingPrediction.Score);
        }
        else
        {
            Console.WriteLine("Product " + testInput.asin + " is not recommended for user " + testInput.reviewerID + ". Score=" + movieRatingPrediction.Score);
        }
    }
}
UseModelForSinglePrediction(mlContext, model, reviewsSrc);



Product 0000031887 is recommended for user A1KLRMWW2FWPL4. Score=4,799408


Product 0123456479 is recommended for user A1KLRMWW2FWPL4. Score=4,8967004


Product 1608299953 is recommended for user A1KLRMWW2FWPL4. Score=3,7692087


Product 1617160377 is recommended for user A1KLRMWW2FWPL4. Score=4,961419


Product B00001W0KA is recommended for user A1KLRMWW2FWPL4. Score=4,985507


Product B00001WRHJ is recommended for user A1KLRMWW2FWPL4. Score=4,12987


Product B00004SR8W is recommended for user A1KLRMWW2FWPL4. Score=4,3379045


Product B00004SR8Z is recommended for user A1KLRMWW2FWPL4. Score=4,1612067


Product B00004SR9P is recommended for user A1KLRMWW2FWPL4. Score=3,9366279


Product B00004U1J2 is not recommended for user A1KLRMWW2FWPL4. Score=3,4680696


## SaveModel

In [None]:
var modelPath = "./Data/MovieRecommenderModel.zip";
public static void SaveModel(MLContext mlContext, DataViewSchema trainDataViewSchema, ITransformer model, string modelPath)
{

    Console.WriteLine("========================== Saving the model to a file ==================================");
    mlContext
        .Model
        .Save(model, trainDataViewSchema, modelPath);
}
SaveModel(mlContext, trainDataView.Schema, model, modelPath);



## Get Model from a file

In [None]:
MLContext mLContext = new MLContext();
DataViewSchema schema;
var model1 = mlContext.Model.Load(modelPath, out schema);

## Evaluate model from a file

In [None]:
EvaluateModel(mlContext, testDataView, model1);



Root Mean Squared Error : 0,36354854529594377


RSquared: 0,8871808716399533
