# This notebook analyzes income data and makes binary classification decision if the person's income is more than 50k annually

In [None]:
#r "nuget:Microsoft.ML,1.7.0"

using System.IO;
using Microsoft.ML.Data;
using Microsoft.ML;
using static Microsoft.ML.DataOperationsCatalog;

In [None]:
string dir = Directory.GetCurrentDirectory();
string dataDir = $"{dir}\\data";
string testDatasetFilePath = $"{dataDir}\\test.csv";
string trainDatasetFilePath = $"{dataDir}\\train.csv";

In [None]:
MLContext mlContext = new MLContext(seed: 0);

In [None]:
public class IncomeData
{
    [LoadColumn(0)]
    public float Age { get; set; }

    [LoadColumn(1)]
    public string Workclass { get; set; }

    [LoadColumn(3)]
    public string Education { get; set;}

    [LoadColumn(14), ColumnName("Label")]
    public bool Sentiment { get; set; }
}

public class IncomeDataPrediction : IncomeData
{
    [ColumnName("PredictedLabel")]
    public bool Prediction { get; set; }

    public float Probability { get; set; }
    
    public float Score { get; set; }
}

In [None]:
IDataView dataView = mlContext.Data.LoadFromTextFile<IncomeData>(
    trainDatasetFilePath, separatorChar: ',', hasHeader: true);

In [None]:
TrainTestData splitDataView = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);

In [None]:
splitDataView.TrainSet.Preview(5).RowView

index,Values
0,"[ Age: 67, Workclass: Private, Education: Doctorate, Label: True ]"
1,"[ Age: 17, Workclass: Private, Education: 12th, Label: False ]"
2,"[ Age: 31, Workclass: Private, Education: Bachelors, Label: True ]"
3,"[ Age: 58, Workclass: State-gov, Education: 7th-8th, Label: False ]"
4,"[ Age: 25, Workclass: State-gov, Education: Some-college, Label: False ]"


In [None]:
var estimator = mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "WorkclassEncoded", inputColumnName: nameof(IncomeData.Workclass))
    .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "EducationEncoded", inputColumnName: nameof(IncomeData.Education)))
    .Append(
        mlContext.Transforms.Concatenate("Features", 
                                                    nameof(IncomeData.Age),
                                                    "WorkclassEncoded",
                                                    "EducationEncoded"))
    .Append(
        mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features"));

In [None]:
var model = estimator.Fit(splitDataView.TrainSet);

In [None]:
splitDataView.TestSet.Preview(5).RowView

index,Values
0,"[ Age: 28, Workclass: Private, Education: HS-grad, Label: False ]"
1,"[ Age: 76, Workclass: Private, Education: 7th-8th, Label: False ]"
2,"[ Age: 54, Workclass: Federal-gov, Education: HS-grad, Label: True ]"
3,"[ Age: 59, Workclass: Private, Education: Bachelors, Label: True ]"
4,"[ Age: 49, Workclass: Private, Education: Some-college, Label: True ]"


In [None]:
IDataView predictions = model.Transform(splitDataView.TestSet);
predictions.Preview(5).RowView

index,Values
0,"[ Age: 28, Workclass: Private, Education: HS-grad, Label: False, WorkclassEncoded: 1, WorkclassEncoded: { Sparse vector of size 8, 1 explicit values: IsDense: False, Length: 8 }, EducationEncoded: 6, EducationEncoded: { Sparse vector of size 16, 1 explicit values: IsDense: False, Length: 16 }, Features: { Sparse vector of size 25, 3 explicit values: IsDense: False, Length: 25 }, PredictedLabel: False, Score: -3.203324, Probability: 0.039040823 ]"
1,"[ Age: 76, Workclass: Private, Education: 7th-8th, Label: False, WorkclassEncoded: 1, WorkclassEncoded: { Sparse vector of size 8, 1 explicit values: IsDense: False, Length: 8 }, EducationEncoded: 4, EducationEncoded: { Sparse vector of size 16, 1 explicit values: IsDense: False, Length: 16 }, Features: { Sparse vector of size 25, 3 explicit values: IsDense: False, Length: 25 }, PredictedLabel: False, Score: -1.060967, Probability: 0.25712472 ]"
2,"[ Age: 54, Workclass: Federal-gov, Education: HS-grad, Label: True, WorkclassEncoded: 6, WorkclassEncoded: { Sparse vector of size 8, 1 explicit values: IsDense: False, Length: 8 }, EducationEncoded: 6, EducationEncoded: { Sparse vector of size 16, 1 explicit values: IsDense: False, Length: 16 }, Features: { Sparse vector of size 25, 3 explicit values: IsDense: False, Length: 25 }, PredictedLabel: False, Score: -1.3198283, Probability: 0.21084687 ]"
3,"[ Age: 59, Workclass: Private, Education: Bachelors, Label: True, WorkclassEncoded: 1, WorkclassEncoded: { Sparse vector of size 8, 1 explicit values: IsDense: False, Length: 8 }, EducationEncoded: 3, EducationEncoded: { Sparse vector of size 16, 1 explicit values: IsDense: False, Length: 16 }, Features: { Sparse vector of size 25, 3 explicit values: IsDense: False, Length: 25 }, PredictedLabel: True, Score: 0.32380056, Probability: 0.5802502 ]"
4,"[ Age: 49, Workclass: Private, Education: Some-college, Label: True, WorkclassEncoded: 1, WorkclassEncoded: { Sparse vector of size 8, 1 explicit values: IsDense: False, Length: 8 }, EducationEncoded: 5, EducationEncoded: { Sparse vector of size 16, 1 explicit values: IsDense: False, Length: 16 }, Features: { Sparse vector of size 25, 3 explicit values: IsDense: False, Length: 25 }, PredictedLabel: False, Score: -1.6060679, Probability: 0.16713525 ]"


In [None]:
CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(predictions, "Label");

In [None]:
Console.WriteLine();
Console.WriteLine("Model quality metrics evaluation");
Console.WriteLine("--------------------------------");
Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
Console.WriteLine($"Auc: {metrics.AreaUnderRocCurve:P2}");
Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
Console.WriteLine("=============== End of model evaluation ===============");


Model quality metrics evaluation
--------------------------------
Accuracy: 77.21%
Auc: 76.95%
F1Score: 26.41%


In [None]:
var predictionEngine = mlContext.Model.CreatePredictionEngine<IncomeData, IncomeDataPrediction>(model);

var sample = new IncomeData
{
    Age = 23.0f,
    Education = "Bachelors",
    Workclass = "Private",
    Sentiment = false
};

var result = predictionEngine.Predict(sample);
result

Prediction,Probability,Score,Age,Workclass,Education,Sentiment
False,0.1326429,-1.8777902,23,Private,Bachelors,False
