In [2]:
using LIBSVM
using JLD2, FileIO
using Statistics
using Printf
using Random

# Loads accuracy, oneHotEncoding, normalizeMinMax, etc.
include("unit2-multilayer-perceptron.jl") 
# Loads confusionMatrix and printConfusionMatrix
include("unit4-metrics.jl")

# --- 1. Data Loading Function ---
function load_svm_data(filename::String)
    data = load(filename)

    # Extract raw matrices
    x_train_raw = copy(data["x_train"])
    y_train_raw = copy(data["y_train"])
    x_test_raw  = copy(data["x_test"])
    y_test_raw  = copy(data["y_test"])

    # LIBSVM needs (Features x Samples). We transpose the matrices.
    # Convert to Float64 which is the standard for LIBSVM
    x_train = Float64.(permutedims(x_train_raw))
    x_test  = Float64.(permutedims(x_test_raw))

    # LIBSVM needs labels as a simple Vector of integers, NOT One-Hot encoded
    # Ensure they are one-dimensional vectors
    y_train = vec(Int.(y_train_raw))
    y_test  = vec(Int.(y_test_raw))

    n_inputs = size(x_train, 1)
    println(" Dataset Loaded: $filename")
    println("   > Features: $n_inputs | Samples Train: $(size(x_train, 2)) | Samples Test: $(size(x_test, 2))")

    return (x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
end

calculate_metrics (generic function with 1 method)

In [3]:
# --- 3. Experiment Configuration ---

# Define SVM configurations (Hyperparameters) to test
# Format: (Name, Kernel, Cost, Gamma, Degree)
svm_configs = [
    ("Linear (C=1)",        Kernel.Linear,      1.0,   0.0, 0),   # Baseline Linear
    ("Linear (C=10)",       Kernel.Linear,      10.0,  0.0, 0),   # Harder margin
    ("RBF (C=1, G=Auto)",   Kernel.RadialBasis, 1.0,   0.0, 0),   # 0.0 in Gamma usually means 1/k (Auto)
    ("RBF (C=10, G=0.1)",   Kernel.RadialBasis, 10.0,  0.1, 0),   # Manual Tuning
    ("RBF (C=100, G=0.01)", Kernel.RadialBasis, 100.0, 0.01, 0),  # High cost, low gamma
    ("RBF (C=10, G=1.0)",   Kernel.RadialBasis, 10.0,  1.0, 0),   # High gamma
    ("Poly (Deg=2, C=1)",   Kernel.Polynomial,  1.0,   0.1, 2),   # Polynomial degree 2
    ("Poly (Deg=3, C=1)",   Kernel.Polynomial,  1.0,   0.1, 3),   # Polynomial degree 3
]

println("SVM configurations loaded: $(length(svm_configs)) scenarios.")

SVM configurations loaded: 8 scenarios.


In [4]:
# --- 4. Experimental Loop (Training & Evaluation) ---

function run_svm_experiment(dataset, approach_name, configs)
    println("\n=================================================================")
    println(" EXPERIMENT: $approach_name (Goal: Maximize Sensitivity/Recall)")
    println("=================================================================")
    
    x_train, y_train = dataset.x_train, dataset.y_train
    x_test, y_test   = dataset.x_test, dataset.y_test
    
    # Identify classes for metrics calculation (0, 1, 2, 3, 4)
    classes = sort(unique(y_train))
    
    results = []

    for (i, conf) in enumerate(configs)
        (name, k_type, c_val, g_val, deg_val) = conf
        
        print(" Testing Config $i/$(length(configs)): $name ... ")
        start_time = time()
        
        # 1. Train SVM
        # Gamma=0.0 defaults to 1/k in LIBSVM
        model = svmtrain(x_train, y_train, 
                         kernel = k_type, cost = c_val, gamma = g_val, degree = deg_val)
        
        train_time = time() - start_time
        
        # 2. Predict
        y_hat, _ = svmpredict(model, x_test)
        y_hat_int = Int.(y_hat) # Convert to Int for compatibility
        
        # 3. Calculate Metrics (Reusing Unit 4 & Unit 2 Functions)
        # [cite_start]This calls confusionMatrix(outputs::Vector, targets::Vector, classes::Vector) from unit4-metrics.jl [cite: 534]
        # [cite_start]which internally calls oneHotEncoding from unit2-multilayer-perceptron.jl [cite: 816]
        metrics = confusionMatrix(y_hat_int, y_test, classes; weighted=false)
        
        # Extract scalar values
        sens = metrics.sensitivity
        acc  = metrics.accuracy * 100
        f1   = metrics.f_score
        spec = metrics.specificity
        prec = metrics.ppv
        
        push!(results, (
            config = name,
            sens = sens,
            acc = acc,
            f1 = f1,
            spec = spec,
            prec = prec,
            time = train_time
        ))
        
        println("Done. Sens: $(round(sens, digits=4)) | Time: $(round(train_time, digits=3))s")
    end
    
    # Sort by Sensitivity (Descending) - Priority for medical diagnosis
    sort!(results, by = x -> x.sens, rev = true)
    
    # Print Table
    println("\n --- FINAL RESULTS FOR $approach_name ---")
    @printf(" %-20s | %-8s | %-8s | %-8s | %-8s | %-8s\n", 
            "Configuration", "Sens", "Acc(%)", "F1", "Spec", "Time(s)")
    println("-"^80)
    
    for res in results
        @printf(" %-20s | %-8.4f | %-8.2f | %-8.4f | %-8.4f | %-8.3f\n", 
                res.config, res.sens, res.acc, res.f1, res.spec, res.time)
    end
    
    return results[1]
end

run_svm_experiment (generic function with 1 method)

In [6]:
# --- 5. Main Execution ---

# Load Data
# Ensure .jld2 files are in the correct path relative to this script
path_minmax = "data_checkpoints/approach_1_minmax.jld2"
path_pca    = "data_checkpoints/approach_2_pca.jld2"
path_ica    = "data_checkpoints/approach_3_ica.jld2"

# Load Datasets
data_minmax = load_svm_data(path_minmax)
data_pca    = load_svm_data(path_pca)
data_ica    = load_svm_data(path_ica)

# Run Experiments
winner_minmax = run_svm_experiment(data_minmax, "APPROACH 1: MINMAX", svm_configs)
winner_pca    = run_svm_experiment(data_pca,    "APPROACH 2: PCA",    svm_configs)
winner_ica    = run_svm_experiment(data_ica,    "APPROACH 3: ICA",    svm_configs)

# Final Summary Table
println("\n\n****************************************************************")
println("      GLOBAL COMPARISON (SVM WINNERS)")
println("****************************************************************")
println(" Approach | Best Config          | Sensib | Acc(%) | Time(s)")
println("----------|----------------------|--------|--------|---------")

@printf(" MinMax   | %-20s | %.4f | %-6.2f | %.3f\n", winner_minmax.config, winner_minmax.sens, winner_minmax.acc, winner_minmax.time)
@printf(" PCA      | %-20s | %.4f | %-6.2f | %.3f\n", winner_pca.config,    winner_pca.sens,    winner_pca.acc,    winner_pca.time)
@printf(" ICA      | %-20s | %.4f | %-6.2f | %.3f\n", winner_ica.config,    winner_ica.sens,    winner_ica.acc,    winner_ica.time)

[33m[1m└ [22m[39m[90m@ JLD2 /opt/julia/packages/JLD2/WDhXU/src/data/reconstructing_datatypes.jl:588[39m
[33m[1m└ [22m[39m[90m@ JLD2 /opt/julia/packages/JLD2/WDhXU/src/data/reconstructing_datatypes.jl:588[39m
[33m[1m└ [22m[39m[90m@ JLD2 /opt/julia/packages/JLD2/WDhXU/src/data/reconstructing_datatypes.jl:588[39m


LoadError: MethodError: no method matching copy(::JLD2.ReconstructedMutable{Symbol("CategoricalArray{Int64,1,UInt32,Int64,JLD2.ReconstructedMutable{Symbol(\"CategoricalValue{Int64,UInt32}\"), (:pool, :ref), Tuple{Any, UInt32}},Union{}}"), (:refs, :pool), Tuple{Any, Any}})

[0mClosest candidates are:
[0m  copy([91m::LinearAlgebra.Hessenberg{<:Any, <:LinearAlgebra.UpperHessenberg}[39m)
[0m[90m   @[39m [32mLinearAlgebra[39m [90m/opt/julia-1.9.3/share/julia/stdlib/v1.9/LinearAlgebra/src/[39m[90m[4mhessenberg.jl:422[24m[39m
[0m  copy([91m::LinearAlgebra.Hessenberg{<:Any, <:LinearAlgebra.SymTridiagonal}[39m)
[0m[90m   @[39m [32mLinearAlgebra[39m [90m/opt/julia-1.9.3/share/julia/stdlib/v1.9/LinearAlgebra/src/[39m[90m[4mhessenberg.jl:423[24m[39m
[0m  copy([91m::LinearAlgebra.LQ[39m)
[0m[90m   @[39m [32mLinearAlgebra[39m [90m/opt/julia-1.9.3/share/julia/stdlib/v1.9/LinearAlgebra/src/[39m[90m[4mlq.jl:128[24m[39m
[0m  ...
