# _PREDICTIONS HEART DISEASE DATA SET_

<hr style="height:2px;">

[Official Link to the Data Set](https://archive.ics.uci.edu/ml/datasets/Heart+Disease)

<br>

## Sumary
- [Preparing the ambient](#Preparing-the-ambient)

<hr>

## _Preparing the ambient_

First, we need prepare the packages that we will use.
The packages are "DataFrames", "MLJ", "MLJLinearModels", "Plots".

In [3]:
using DataFrames, MLJ, MLJLinearModels, Plots, CSV, DecisionTree, MLJFlux;

If some of the packages is not instaled in your computer just use:
```julia
    using Pkg;
    Pkg.add("PackageName");
```   
<br>

[Back to the top](#Sumary)

In [4]:
function trainer(LRC_model, X, y, train::Array{Int, 1}, test::Array{Int, 1})    
    # Training
    LRC = machine(LRC_model, X, categorical(y));
    MLJ.fit!(LRC, rows=train);

    return LRC
end

trainer (generic function with 1 method)

In [5]:
struct Evaluation
    Accuracy::Float64
    Recall::Float64
    Precision::Float64
    F1::Float64
    MCC::Float64
    ConfusionMatrix
end

In [6]:
function _evaluation(LRC, X, y, test; binary=false)
    ŷ = MLJ.predict(LRC, X[test,:]);
    result = mode.(ŷ);
    
    # Accuracy
    _accuracy = accuracy(result, categorical(y[test]));
    
    
    # Confusion Matrix
    _confusion_matrix = confusion_matrix(result, categorical(y[test]));
    
    TN = _confusion_matrix[1];
    FN = _confusion_matrix[2];
    FP = _confusion_matrix[3];
    TP = _confusion_matrix[4];
    
    # Recall
    _recall = TP / (TP + FN)
    _precision = TP / (TP + FP)
    _f1 = (2 * _precision) / (_precision + _recall)
    
    _mcc = 0
    if(binary)
        _mcc = (TP * TN - FP * FN) / ((TP + FP)*(TP + FN)*(TN + FP)*(TN + FN))^(1/2)
    end
    
    return Evaluation(_accuracy, _recall, _precision, _f1, _mcc, _confusion_matrix)
end

_evaluation (generic function with 1 method)

# DataFrame

In [7]:
cleveland = DataFrame(CSV.File("Data/AnalysisData/AnalysisData.data"))

# Categorical

cleveland.sex = categorical(cleveland.sex);
cleveland.cp = categorical(cleveland.cp);
cleveland.fbs = categorical(cleveland.fbs);
cleveland.restecg = categorical(cleveland.restecg);
cleveland.exang = categorical(cleveland.exang);
cleveland.slope = categorical(cleveland.slope);
cleveland.thal = categorical(cleveland.thal);
cleveland.target = categorical(cleveland.target);



In [8]:
y, X = unpack(df, ==(:target), colname -> true);
train, test = partition(eachindex(y), 0.7, stratify=y);

UndefVarError: UndefVarError: y not defined

# Logistic Regression

In [141]:
LRC_model =  MLJLinearModels.LogisticClassifier();
LRC = trainer(LRC_model, cleveland, train, test);
__evaLRC = _evaluation(LRC, cleveland, test; binary=true);

└ @ MLJBase C:\Users\yancf\.julia\packages\MLJBase\8FWJ9\src\machines.jl:73
┌ Info: Training [34mMachine{LogisticClassifier} @676[39m.
└ @ MLJBase C:\Users\yancf\.julia\packages\MLJBase\8FWJ9\src\machines.jl:317
│ using: negative='0' and positive='1'.
└ @ MLJBase C:\Users\yancf\.julia\packages\MLJBase\8FWJ9\src\measures\confusion_matrix.jl:83


In [142]:
__evaLRC.ConfusionMatrix

              ┌───────────────────────────┐
              │       Ground Truth        │
┌─────────────┼─────────────┬─────────────┤
│  Predicted  │      0      │      1      │
├─────────────┼─────────────┼─────────────┤
│      0      │     44      │     12      │
├─────────────┼─────────────┼─────────────┤
│      1      │      4      │     29      │
└─────────────┴─────────────┴─────────────┘


In [143]:
println("Accuracy: $(round(__evaLRC.Accuracy, digits=2))")
println("Recall: $(round(__evaLRC.Recall, digits=2))")
println("Precision: $(round(__evaLRC.Precision, digits=2))")
println("F1: $(round(__evaLRC.F1, digits=2))")
println("MCC: $(round(__evaLRC.MCC, digits=2))")

Accuracy: 0.82
Recall: 0.88
Precision: 0.71
F1: 0.89
MCC: 0.64


# Tree

In [151]:
Tree_model = @load DecisionTreeClassifier verbosity=1
Tree = trainer(Tree_model, cleveland, train, test);
__evaTree = _evaluation(Tree, cleveland, test; binary=true);

┌ Info: Model code for DecisionTreeClassifier already loaded
└ @ MLJModels C:\Users\yancf\.julia\packages\MLJModels\5DFoi\src\loading.jl:54
└ @ MLJBase C:\Users\yancf\.julia\packages\MLJBase\8FWJ9\src\machines.jl:73
┌ Info: Training [34mMachine{DecisionTreeClassifier} @577[39m.
└ @ MLJBase C:\Users\yancf\.julia\packages\MLJBase\8FWJ9\src\machines.jl:317
│ using: negative='0' and positive='1'.
└ @ MLJBase C:\Users\yancf\.julia\packages\MLJBase\8FWJ9\src\measures\confusion_matrix.jl:83


In [152]:
__evaTree.ConfusionMatrix

              ┌───────────────────────────┐
              │       Ground Truth        │
┌─────────────┼─────────────┬─────────────┤
│  Predicted  │      0      │      1      │
├─────────────┼─────────────┼─────────────┤
│      0      │     39      │     16      │
├─────────────┼─────────────┼─────────────┤
│      1      │      9      │     25      │
└─────────────┴─────────────┴─────────────┘


In [153]:
println("Accuracy: $(round(__evaTree.Accuracy, digits=2))")
println("Recall: $(round(__evaTree.Recall, digits=2))")
println("Precision: $(round(__evaTree.Precision, digits=2))")
println("F1: $(round(__evaTree.F1, digits=2))")
println("MCC: $(round(__evaTree.MCC, digits=2))")

Accuracy: 0.72
Recall: 0.74
Precision: 0.61
F1: 0.91
MCC: 0.43


# Neural Network

In [156]:
NNC_model = NeuralNetworkClassifier()
NNC = trainer(NNC_model, cleveland, train, test);
__evaNNC = _evaluation(NNC, cleveland, test; binary=true);

└ @ MLJBase C:\Users\yancf\.julia\packages\MLJBase\8FWJ9\src\machines.jl:73
┌ Info: Training [34mMachine{NeuralNetworkClassifier{Short,…}} @472[39m.
└ @ MLJBase C:\Users\yancf\.julia\packages\MLJBase\8FWJ9\src\machines.jl:317
│ using: negative='0' and positive='1'.
└ @ MLJBase C:\Users\yancf\.julia\packages\MLJBase\8FWJ9\src\measures\confusion_matrix.jl:83


In [157]:
__evaNNC.ConfusionMatrix

              ┌───────────────────────────┐
              │       Ground Truth        │
┌─────────────┼─────────────┬─────────────┤
│  Predicted  │      0      │      1      │
├─────────────┼─────────────┼─────────────┤
│      0      │     48      │     41      │
├─────────────┼─────────────┼─────────────┤
│      1      │      0      │      0      │
└─────────────┴─────────────┴─────────────┘


In [158]:
println("Accuracy: $(round(__evaNNC.Accuracy, digits=2))")
println("Recall: $(round(__evaNNC.Recall, digits=2))")
println("Precision: $(round(__evaNNC.Precision, digits=2))")
println("F1: $(round(__evaNNC.F1, digits=2))")
println("MCC: $(round(__evaNNC.MCC, digits=2))")

Accuracy: 0.54
Recall: NaN
Precision: 0.0
F1: NaN
MCC: NaN
