In [1]:
using DataFrames, CSV, Statistics, MLJ, Random, RDatasets

In [2]:
df = DataFrame(CSV.File("processed.cleveland.csv"))
rename!(df, [:age, :sex, :cp, :trestbps, :chol, :fbs, :restecg, :thalach, :exang, :oldpeak, :slope, :ca, :thal, :target]);

## Limpando dados faltosos

In [3]:
#limpando os dados

#permitindo missing data no df
allowmissing!(df)

#substituindo todos os dados faltosos "?" por missing values
df .= ifelse.(df .=="?", missing, df)

#removendo as linhas com missing values
dropmissing!(df)

#transformando as duas colunas em float64
df.ca = parse.(Float64, df.ca)
df.thal = parse.(Float64, df.thal);


## Particionando os dados de forma stratificada e passando o target para binário

In [4]:
y, X = unpack(df, ==(:target), colname -> true);

train, test = partition(eachindex(y), 0.8, stratify=y);

## Testando a partição

In [14]:
distTest0 = (size(filter(x -> x == 0, y[test]))[1] / size(y[test])[1]) * 100
distTest1 = (size(filter(x -> x == 1, y[test]))[1] / size(y[test])[1]) * 100
distTest2 = (size(filter(x -> x == 2, y[test]))[1] / size(y[test])[1]) * 100
distTest3 = (size(filter(x -> x == 3, y[test]))[1] / size(y[test])[1]) * 100
distTest4 = (size(filter(x -> x == 4, y[test]))[1] / size(y[test])[1]) * 100

println(distTest0, "%")
println(distTest1, "%")
println(distTest2, "%")
println(distTest3, "%")
println(distTest4, "%")

53.333333333333336%
18.333333333333332%
11.666666666666666%
11.666666666666666%
5.0%


## Carregando o modelo

In [5]:
tree_model = @load DecisionTreeClassifier verbosity=1

import MLJModels ✔
import DecisionTree 

┌ Info: Loading into module "Main": 
└ @ MLJModels /home/brainiac/.julia/packages/MLJModels/BQAzu/src/loading.jl:70


✔
import MLJModels.DecisionTree_ ✔


DecisionTreeClassifier(
    max_depth = -1,
    min_samples_leaf = 1,
    min_samples_split = 2,
    min_purity_increase = 0.0,
    n_subfeatures = 0,
    post_prune = false,
    merge_purity_threshold = 1.0,
    pdf_smoothing = 0.0,
    display_depth = 5)[34m @341[39m

In [6]:

tree = machine(tree_model, X, categorical(y))

[34mMachine{DecisionTreeClassifier} @626[39m trained 0 times.
  args: 
    1:	[34mSource @423[39m ⏎ `Table{AbstractArray{Continuous,1}}`
    2:	[34mSource @381[39m ⏎ `AbstractArray{Multiclass{5},1}`


## Treinando o modelo

In [7]:
 # 70:30 split
fit!(tree, rows=train);
yhat = predict(tree, X[test,:]);


┌ Info: Training [34mMachine{DecisionTreeClassifier} @626[39m.
└ @ MLJBase /home/brainiac/.julia/packages/MLJBase/2yoMe/src/machines.jl:317


## Avaliando

In [8]:
resultado = mode.(yhat);

In [9]:
accuracy(resultado, categorical(y[test]))

0.5