task name: Resolve Ngram Extraction Task
task description: Implement Ngram extraction using the provided C# code

In [4]:
#r "nuget:Microsoft.ML"


In [5]:
using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;

In [6]:
var ml = new MLContext();

public class SampleSentimentData
{
    public bool Sentiment { get; set; }
    public string SentimentText { get; set; }
}

var data = new List<SampleSentimentData>() {
    new SampleSentimentData { Sentiment = true, SentimentText = "Best game I've ever played." },
    new SampleSentimentData { Sentiment = false, SentimentText = "==RUDE== Dude, 2" },
    new SampleSentimentData { Sentiment = true, SentimentText = "Until the next game, this is the best Xbox game!" } 
};

var trainData = ml.Data.LoadFromEnumerable(data);

In [7]:
var charsPipeline = ml.Transforms.Text.TokenizeIntoCharactersAsKeys("Chars", "SentimentText", useMarkerCharacters: false);
var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("CharsUnigrams", "Chars", ngramLength: 1);
var ngramTwpPipeline = ml.Transforms.Text.ProduceNgrams("CharsTwograms", "Chars");
var oneCharsPipeline = charsPipeline.Append(ngramOnePipeline);
var twoCharsPipeline = charsPipeline.Append(ngramTwpPipeline);

In [8]:
var transformedData_onechars = oneCharsPipeline.Fit(trainData).Transform(trainData);
var transformedData_twochars = twoCharsPipeline.Fit(trainData).Transform(trainData);

In [9]:
Action<string, IEnumerable<VBuffer<float>>, VBuffer<ReadOnlyMemory<char>>> printHelper = (columnName, column, names) =>
{
    Console.WriteLine($"{columnName} column obtained post-transformation.");

    var slots = names.GetValues();
    foreach (var featureRow in column)
    {
        foreach (var item in featureRow.Items())
            Console.Write($"'{slots[item.Key]}' - {item.Value} ");
        Console.WriteLine("");
    }

    Console.WriteLine("===================================================");
};

// print CharsUnigrams
VBuffer<ReadOnlyMemory<char>> slotNames = default;
transformedData_onechars.Schema["CharsUnigrams"].GetSlotNames(ref slotNames);
var charsOneGramColumn = transformedData_onechars.GetColumn<VBuffer<float>>(transformedData_onechars.Schema["CharsUnigrams"]);
printHelper("CharsUnigrams", charsOneGramColumn, slotNames);

// print CharsTwograms
var charsTwoGramColumn = transformedData_twochars.GetColumn<VBuffer<float>>(transformedData_twochars.Schema["CharsTwograms"]);
transformedData_twochars.Schema["CharsTwograms"].GetSlotNames(ref slotNames);
printHelper("CharsTwograms", charsTwoGramColumn, slotNames);

CharsUnigrams column obtained post-transformation.
'B' - 1 'e' - 6 's' - 1 't' - 1 '<␠>' - 4 'g' - 1 'a' - 2 'm' - 1 'I' - 1 ''' - 1 'v' - 2 'r' - 1 'p' - 1 'l' - 1 'y' - 1 'd' - 1 '.' - 1 '=' - 0 'R' - 0 'U' - 0 'D' - 0 'E' - 0 'u' - 0 ',' - 0 '2' - 0 'n' - 0 'i' - 0 'h' - 0 'x' - 0 'b' - 0 'X' - 0 'o' - 0 '!' - 0 
'e' - 1 '<␠>' - 2 'd' - 1 '=' - 4 'R' - 1 'U' - 1 'D' - 2 'E' - 1 'u' - 1 ',' - 1 '2' - 1 
'B' - 0 'e' - 6 's' - 3 't' - 6 '<␠>' - 9 'g' - 2 'a' - 2 'm' - 2 'I' - 0 ''' - 0 'v' - 0 'r' - 0 'p' - 0 'l' - 1 'y' - 0 'd' - 0 '.' - 0 '=' - 0 'R' - 0 'U' - 1 'D' - 0 'E' - 0 'u' - 0 ',' - 1 '2' - 0 'n' - 2 'i' - 3 'h' - 3 'x' - 2 'b' - 2 'X' - 1 'o' - 1 '!' - 1 
CharsTwograms column obtained post-transformation.
'B' - 1 'B|e' - 1 'e' - 6 'e|s' - 1 's' - 1 's|t' - 1 't' - 1 't|<␠>' - 1 '<␠>' - 4 '<␠>|g' - 1 'g' - 1 'g|a' - 1 'a' - 2 'a|m' - 1 'm' - 1 'm|e' - 1 'e|<␠>' - 2 '<␠>|I' - 1 'I' - 1 'I|'' - 1 ''' - 1 ''|v' - 1 'v' - 2 'v|e' - 2 '<␠>|e' - 1 'e|v' - 1 'e|r' - 1 'r' - 1 'r|<␠