task name: LoadingText
task description: Implement the provided C# code to load and process text data

In [None]:
#r "nuget:Microsoft.ML"


In [None]:
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Microsoft.ML;
using Microsoft.ML.Data;

In [None]:
var mlContext = new MLContext();

In [None]:
// Create 5 data files to illustrate different loading methods.
var dataFiles = new List<string>();
var random = new Random(1);
var dataDirectoryName = "DataDir";
Directory.CreateDirectory(dataDirectoryName);
for (int i = 0; i < 5; i++)
{
    var fileName = Path.Combine(dataDirectoryName, $"Data_{i}.csv");
    dataFiles.Add(fileName);
    using (var fs = File.CreateText(fileName))
    {
        // Write without header with 10 random columns, forcing
        // approximately 80% of values to be 0.
        for (int line = 0; line < 10; line++)
        {
            var sb = new StringBuilder();
            for (int pos = 0; pos < 10; pos++)
            {
                var value = random.NextDouble();
                sb.Append((value < 0.8 ? 0 : value).ToString() + '\t');
            }
            fs.WriteLine(sb.ToString(0, sb.Length - 1));
        }
    }
}

In [None]:
private static void PrintRowCount(IDataView idv)
{
    // IDataView is lazy so we need to iterate through it
    // to get the number of rows.
    long rowCount = 0;
    using (var cursor = idv.GetRowCursor(idv.Schema))
        while (cursor.MoveNext())
            rowCount++;

    Console.WriteLine(rowCount);
}

In [None]:
// Create a TextLoader.
var loader = mlContext.Data.CreateTextLoader(
    columns: new[]
    {
        new TextLoader.Column("Features", DataKind.Single, 0, 9)
    },
    hasHeader: false
);

// Load a single file from path.
var singleFileData = loader.Load(dataFiles[0]);
PrintRowCount(singleFileData);