# ML.Net - StopWords

In [25]:
// ML.NET Nuget packages installation
//#r "nuget:Microsoft.ML,1.3.1" 
#r "nuget:Microsoft.ML" 

## Using C# Class

In [26]:
using Microsoft.ML;
using Microsoft.ML.Data;
using System;
using System.Collections.Generic;
using System.Text;

## Declare data-classes for input data and predictions

In [27]:
public class TextData
{
    public string Text { get; set; }
}
  
public class TextTokens
{
    public string[] Tokens { get; set; }
}

## Função Auxiliar para Imprimir os Tokens

In [28]:
private static void PrintTokens(TextTokens tokens)
{
    Console.WriteLine(Environment.NewLine);

    var sb = new StringBuilder();

    foreach (var token in tokens.Tokens)
    {
        sb.AppendLine(token);
    }

    Console.WriteLine(sb.ToString());
}

In [29]:
var context = new MLContext();

var emptyData = new List<TextData>();

var data = context.Data.LoadFromEnumerable(emptyData);

var tokenization = context.Transforms.Text.TokenizeIntoWords("Tokens", "Text", separators: new[] { ' ', '.', ',' })
    .Append(context.Transforms.Text.RemoveDefaultStopWords("Tokens", "Tokens",
        Microsoft.ML.Transforms.Text.StopWordsRemovingEstimator.Language.English));

var stopWordsModel = tokenization.Fit(data);

var engine = context.Model.CreatePredictionEngine<TextData, TextTokens>(stopWordsModel);

var newText = engine.Predict(new TextData { Text = "This is a test sentence, and it is a long one." });

PrintTokens(newText);

var customTokenization = context.Transforms.Text.TokenizeIntoWords("Tokens", "Text", separators: new[] { ' ', '.', ',' })
    .Append(context.Transforms.Text.RemoveStopWords("Tokens", "Tokens", new[] { "and", "a" }));

var customStopWordsModel = customTokenization.Fit(data);

var customEngine = context.Model.CreatePredictionEngine<TextData, TextTokens>(customStopWordsModel);

var newCustomText = customEngine.Predict(new TextData { Text = "This is a test sentence, and it is a long one." });

PrintTokens(newCustomText);

Console.ReadLine();



test
sentence
long



This
is
test
sentence
it
is
long
one

