In [1]:
using Plots, StatsPlots
include("src/SFGP_classification.jl")
using Flux
using StatsBase
using CSV, DataFrames
using ScikitLearn
using KnetMetrics
using Tables

In [2]:
df = CSV.File("./data/heart.csv",header = false) |> DataFrame
df = df[(df[:,12].!="?").*(df[:,13].!="?"),:]

df[!,12] = parse.(Float64,string.(df[:,12]))
df[!,13] = parse.(Float64,string.(df[:,13]))

df = Matrix(df)

297×14 Matrix{Float64}:
 63.0  1.0  1.0  145.0  233.0  1.0  2.0  150.0  0.0  2.3  3.0  0.0  6.0  0.0
 67.0  1.0  4.0  160.0  286.0  0.0  2.0  108.0  1.0  1.5  2.0  3.0  3.0  2.0
 67.0  1.0  4.0  120.0  229.0  0.0  2.0  129.0  1.0  2.6  2.0  2.0  7.0  1.0
 37.0  1.0  3.0  130.0  250.0  0.0  0.0  187.0  0.0  3.5  3.0  0.0  3.0  0.0
 41.0  0.0  2.0  130.0  204.0  0.0  2.0  172.0  0.0  1.4  1.0  0.0  3.0  0.0
 56.0  1.0  2.0  120.0  236.0  0.0  0.0  178.0  0.0  0.8  1.0  0.0  3.0  0.0
 62.0  0.0  4.0  140.0  268.0  0.0  2.0  160.0  0.0  3.6  3.0  2.0  3.0  3.0
 57.0  0.0  4.0  120.0  354.0  0.0  0.0  163.0  1.0  0.6  1.0  0.0  3.0  0.0
 63.0  1.0  4.0  130.0  254.0  0.0  2.0  147.0  0.0  1.4  2.0  1.0  7.0  2.0
 53.0  1.0  4.0  140.0  203.0  1.0  2.0  155.0  1.0  3.1  3.0  0.0  7.0  1.0
 57.0  1.0  4.0  140.0  192.0  0.0  0.0  148.0  0.0  0.4  2.0  0.0  6.0  0.0
 56.0  0.0  2.0  140.0  294.0  0.0  2.0  153.0  0.0  1.3  2.0  0.0  3.0  0.0
 56.0  1.0  3.0  130.0  256.0  1.0  2.0  142.0  1.0 

In [3]:
X = transpose(df)[1:end-1,:]
y = transpose(df)[end:end,:]
y[y.!=0] .= 1 

137-element view(::Vector{Float64}, [2, 3, 7, 9, 10, 13, 17, 23, 24, 25  …  286, 288, 289, 290, 292, 293, 294, 295, 296, 297]) with eltype Float64:
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 ⋮
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0

In [4]:
function getLoglike(mm::SFGP,X,y)
    
    m,S = mm.gp(X)
    s = Matrix(transpose(sqrt.(diag(S))[:,:]))
    
    probs = getProbs.(m,s,mm.lower,mm.upper)
    vals = (mm.vals)
    
    return mean(logpdf.([MixtureModel(Bernoulli.(vals),p) for p in Flux.unstack(probs,2)],y[:]))
end


function getF1score(mm::SFGP,X,y)
    
    m,S = mm.gp(X)
    s = Matrix(transpose(sqrt.(diag(S))[:,:]))
    
    probs = getProbs.(m,s,mm.lower,mm.upper)
    vals = (mm.vals)
    
    predictions = round.(mean.([MixtureModel(Bernoulli.(vals),p) for p in Flux.unstack(probs,2)]))
    
    return KnetMetrics.f1_score(KnetMetrics.confusion_matrix(Int.(y[:]),Int.(predictions), labels=[0,1]),class_name=1)
    
end

getF1score (generic function with 1 method)

In [5]:
import Random
Random.seed!(321)

folds = ScikitLearn.CrossValidation.KFold(size(X,2),n_folds=10)

lls = []
f1s = []
i = 0

for (train, test) in folds
    
    Xm = mean(X,dims=2)
    Xs = std(X,dims=2)
    sfgp = SFGP(SVGP((X[:,1:10] .- Xm)./Xs),100,5)
    
    params = Flux.params(sfgp)
    opt = ADAM(0.05)
    
    Xtrain = X[:,train]
    
    mean_train = mean(Xtrain,dims=2)
    std_train = std(Xtrain,dims=2)
    Xtrain = (Xtrain.-mean_train) ./ std_train
    
    Xtest = X[:,test]
    Xtest = (Xtest.-mean_train) ./ std_train
    ytrain = y[:,train]
    ytest = y[:,test]
    
    for i in 1:350
        grads = Zygote.gradient(() -> sample_elbo(sfgp,Xtrain,ytrain),params)
        Flux.Optimise.update!(opt,params,grads)
    end
    
    push!(lls, getLoglike(sfgp,Xtest,ytest))
    push!(f1s, getF1score(sfgp,Xtest,ytest))
    
    i = i+1
    println(i)
end

1
2
3
4
5
6
7
8
9
10


└ @ KnetMetrics.Classification /Users/saremseitz/.julia/packages/KnetMetrics/9L9oI/src/classification/confusion_matrix.jl:209
└ @ KnetMetrics.Classification /Users/saremseitz/.julia/packages/KnetMetrics/9L9oI/src/classification/confusion_matrix.jl:212
└ @ KnetMetrics.Classification /Users/saremseitz/.julia/packages/KnetMetrics/9L9oI/src/classification/confusion_matrix.jl:209
└ @ KnetMetrics.Classification /Users/saremseitz/.julia/packages/KnetMetrics/9L9oI/src/classification/confusion_matrix.jl:212


In [6]:
println(mean(lls))
println(std(lls))

-0.397242499896138
0.08255946052460975


In [7]:
println(mean(f1s))
println(std(f1s))

0.8219841701594858
0.07494024227370344


In [8]:
df = DataFrame(hcat(lls,f1s),[:loglike,:f1score])
CSV.write("./evals/sfgp_classification_heart.csv",  df)

"./evals/sfgp_classification_heart.csv"