# ML.Net - NullValues

In [1]:
// ML.NET Nuget packages installation
#r "nuget:Microsoft.ML" 

Installed package Microsoft.ML version 1.5.0

## Using C# Class

In [2]:
using System;
using Microsoft.ML;
using Microsoft.ML.Data;

using Microsoft.ML;
using System;
using System.Collections.Generic;
using System.Linq;

## Declare data-classes for input data and predictions

In [3]:
public class HousingData
{
    [LoadColumn(0)]
    public float Longitude { get; set; }

    [LoadColumn(1)]
    public float Latitude { get; set; }

    [LoadColumn(2)]
    public float HousingMedianAge { get; set; }

    [LoadColumn(3)]
    public float TotalRooms { get; set; }

    [LoadColumn(4)]
    public float TotalBedrooms { get; set; }

    [LoadColumn(5)]
    public float Population { get; set; }

    [LoadColumn(6)]
    public float Households { get; set; }

    [LoadColumn(7)]
    public float MedianIncome { get; set; }

    [LoadColumn(8)]
    public float MedianHouseValue { get; set; }

    [LoadColumn(9)]
    public string OceanProximity { get; set; }
}

public class MissingData : HousingData
{
    public bool[] MissingValues { get; set; }
}

public class ReplacedValues : HousingData
{
    public float[] NewValues { get; set; }
}

## Evaluate

In [4]:
private static readonly Dictionary<int, int> MISSING_INDEXES = new Dictionary<int, int>();

var context = new MLContext();

var data = context.Data.LoadFromTextFile<HousingData>("./datasets/housing/housing.csv", hasHeader: true, separatorChar: ',');

var columns = data.Schema
    .Select(col => col.Name)
    .Where(colName => colName != "Label" && colName != "OceanProximity")
    .ToArray();

// Indicate missing values
var nullTransform = context.Transforms.Concatenate("Features", columns)
    .Append(context.Transforms.IndicateMissingValues("MissingValues", "Features"));

var nullValues = nullTransform.Fit(data).Transform(data);

var nullData = context.Data.CreateEnumerable<MissingData>(nullValues, 
    reuseRowObject: false).ToArray();

In [6]:
for (int i = 0; i < nullData.Length; i++)
{
    if (nullData[i].MissingValues.Any(a => a == true))
    {
        var missingIndexes = nullData[i].MissingValues.Select((v, idx) => v ? idx : -1 )
            .Where(idx => idx != -1)
            .ToArray();

        foreach (var index in missingIndexes)
        {
            var feature = columns[index];
            MISSING_INDEXES.Add(i, index);

            Console.WriteLine($"Feature {feature} in row {i + 1} has missing value");
        }
    }
}

In [7]:
// Replace missing values
var replaceTransform = context.Transforms.Concatenate("Features", columns)
    .Append(context.Transforms.ReplaceMissingValues("NewValues", "Features", 
        Microsoft.ML.Transforms.MissingValueReplacingEstimator.ReplacementMode.Mean));

var replacedValues = replaceTransform.Fit(data).Transform(data);

var replacedData = context.Data.CreateEnumerable<ReplacedValues>(replacedValues,
    reuseRowObject: false).ToArray();

for (int i = 0; i < replacedData.Count(); i++)
{
    foreach (var index in MISSING_INDEXES)
    {
        if (i == index.Key)
        {
            Console.WriteLine($"New value - {replacedData[i].NewValues[index.Value]}");
        }
    }
}