In [1]:
using DataFrames, CSV, Statistics, MLJ, Random, RDatasets, Flux

In [2]:
df = DataFrame(CSV.File("processed.cleveland.csv"))
rename!(df, [:age, :sex, :cp, :trestbps, :chol, :fbs, :restecg, :thalach, :exang, :oldpeak, :slope, :ca, :thal, :target]);

## Limpando dados faltosos

In [3]:
#limpando os dados

#permitindo missing data no df
allowmissing!(df)

#substituindo todos os dados faltosos "?" por missing values
df .= ifelse.(df .=="?", missing, df)

#removendo as linhas com missing values
dropmissing!(df)

#transformando as duas colunas em float64
df.ca = parse.(Float64, df.ca)
df.thal = parse.(Float64, df.thal);

#passando os dados categóricos para int
df.sex = trunc.(Int, df.sex)
df.cp = trunc.(Int, df.cp)
df.restecg = trunc.(Int, df.restecg)
df.exang = trunc.(Int, df.exang)
df.slope = trunc.(Int, df.slope)
df.ca = trunc.(Int, df.ca)
df.thal = trunc.(Int, df.thal)
df.fbs = trunc.(Int, df.thal);

## Particionando os dados de forma stratificada e passando o target para binário

In [4]:
#dividindo o target dos demais atributos
y, X = unpack(df, ==(:target), colname -> true);

#passando para variaveis binárias
for i = 1: size(y)[1]
    y[i] == 0 ? y[i] = 0 : y[i] = 1
end

#particionando para train test 80/20
train, test = partition(eachindex(y), 0.8, stratify=y);

## Testando a partição

In [5]:
distTest0 = (size(filter(x -> x == 0, y[test]))[1] / size(y[test])[1]) * 100
distTest1 = (size(filter(x -> x == 1, y[test]))[1] / size(y[test])[1]) * 100

println(distTest0, "%")
println(distTest1, "%")

54.23728813559322%
45.76271186440678%


## Carregando o modelo Decision Tree

In [6]:
tree_model = @load DecisionTreeClassifier verbosity=1

import MLJModels ✔
import DecisionTree 

┌ Info: Loading into module "Main": 
└ @ MLJModels /home/brainiac/.julia/packages/MLJModels/BQAzu/src/loading.jl:70


✔
import MLJModels.DecisionTree_ ✔


DecisionTreeClassifier(
    max_depth = -1,
    min_samples_leaf = 1,
    min_samples_split = 2,
    min_purity_increase = 0.0,
    n_subfeatures = 0,
    post_prune = false,
    merge_purity_threshold = 1.0,
    pdf_smoothing = 0.0,
    display_depth = 5)[34m @538[39m

In [7]:

tree = machine(tree_model, X, categorical(y))

[34mMachine{DecisionTreeClassifier} @266[39m trained 0 times.
  args: 
    1:	[34mSource @655[39m ⏎ `Table{Union{AbstractArray{Continuous,1}, AbstractArray{Count,1}}}`
    2:	[34mSource @040[39m ⏎ `AbstractArray{Multiclass{2},1}`


## Treinando o modelo

In [8]:
MLJ.fit!(tree, rows=train);
ŷ = MLJ.predict(tree, X[test,:]);

┌ Info: Training [34mMachine{DecisionTreeClassifier} @266[39m.
└ @ MLJBase /home/brainiac/.julia/packages/MLJBase/2yoMe/src/machines.jl:317


## Avaliando

In [9]:
resultado = mode.(ŷ);

In [10]:
accuracy(resultado, categorical(y[test]))

0.728813559322034

In [11]:
confusion_matrix(resultado, categorical(y[test]))

│ using: negative='0' and positive='1'.
└ @ MLJBase /home/brainiac/.julia/packages/MLJBase/2yoMe/src/measures/confusion_matrix.jl:83


              ┌───────────────────────────┐
              │       Ground Truth        │
┌─────────────┼─────────────┬─────────────┤
│  Predicted  │      0      │      1      │
├─────────────┼─────────────┼─────────────┤
│      0      │     27      │     11      │
├─────────────┼─────────────┼─────────────┤
│      1      │      5      │     16      │
└─────────────┴─────────────┴─────────────┘


In [58]:
TP = 16
FN = 11
FP = 5
recall = TP/TP+FN
precision = TP/TP+FP
f1 = (2 * recall * precision) / (precision + recall)

print("Recall: ", recall, "\nPrecision: ", precision, "\nF1: ", f1)

Recall: 12.0
Precision: 6.0
F1: 8.0

## Treinando o modelo Logistic Regression

In [21]:
using MLJLinearModels

┌ Info: Precompiling MLJLinearModels [6ee0df7b-362f-4a72-a706-9e79364fb692]
└ @ Base loading.jl:1260


In [22]:
LRC_model =  MLJLinearModels.LogisticClassifier()

LogisticClassifier(
    lambda = 1.0,
    gamma = 0.0,
    penalty = :l2,
    fit_intercept = true,
    penalize_intercept = false,
    solver = nothing)[34m @267[39m

In [23]:
LRC = machine(LRC_model, X, categorical(y))

└ @ MLJBase /home/brainiac/.julia/packages/MLJBase/2yoMe/src/machines.jl:73


[34mMachine{LogisticClassifier} @831[39m trained 0 times.
  args: 
    1:	[34mSource @019[39m ⏎ `Table{Union{AbstractArray{Continuous,1}, AbstractArray{Count,1}}}`
    2:	[34mSource @986[39m ⏎ `AbstractArray{Multiclass{2},1}`


In [24]:
MLJ.fit!(LRC, rows=train);
ŷ = MLJ.predict(LRC, X[test,:]);

┌ Info: Training [34mMachine{LogisticClassifier} @831[39m.
└ @ MLJBase /home/brainiac/.julia/packages/MLJBase/2yoMe/src/machines.jl:317


In [25]:
resultado = mode.(ŷ);

In [26]:
accuracy(resultado, categorical(y[test]))

0.8135593220338984

In [29]:
confusion_matrix(resultado, categorical(y[test]))

│ using: negative='0' and positive='1'.
└ @ MLJBase /home/brainiac/.julia/packages/MLJBase/2yoMe/src/measures/confusion_matrix.jl:83


              ┌───────────────────────────┐
              │       Ground Truth        │
┌─────────────┼─────────────┬─────────────┤
│  Predicted  │      0      │      1      │
├─────────────┼─────────────┼─────────────┤
│      0      │     29      │      8      │
├─────────────┼─────────────┼─────────────┤
│      1      │      3      │     19      │
└─────────────┴─────────────┴─────────────┘


In [57]:
TP = 19
FN = 8
FP = 3
recall = TP/TP+FN
precision = TP/TP+FP
f1 = (2 * recall * precision) / (precision + recall)

print("Recall: ", recall, "\nPrecision: ", precision, "\nF1: ", f1)

Recall: 9.0
Precision: 4.0
F1: 5.538461538461538