In [14]:
using LIBSVM
using JLD2, FileIO
using Statistics
using Printf
using Random
using CategoricalArrays

# Loads accuracy, oneHotEncoding, normalizeMinMax, etc.
include("unit2-multilayer-perceptron.jl") 
# Loads confusionMatrix and printConfusionMatrix
include("unit4-metrics.jl")

# --- 1. Data Loading Function ---
function load_svm_data(filename::String)
    data = load(filename)

    x_train_raw = copy(data["x_train"])
    y_train_raw = copy(data["y_train"])
    x_test_raw  = copy(data["x_test"])
    y_test_raw  = copy(data["y_test"])

    # 1. PROCESAR X (FEATURES)
    # Convertimos a matriz y trasponemos para LIBSVM (Features x Samples)
    x_train = Float64.(permutedims(Matrix(x_train_raw)))
    x_test  = Float64.(permutedims(Matrix(x_test_raw)))

    # 2. PROCESAR Y (LABELS) - SOLUCIÓN ROBUSTA
    # Definimos una función auxiliar para extraer el valor entero
    # Si es CategoricalValue, lo 'desenvuelve'. Si ya es número, lo deja pasar.
    get_val(x) = (typeof(x) <: CategoricalValue) ? unwrap(x) : x

    # Aplicamos la función a cada elemento y convertimos a Int
    # Esto funciona tanto para Vector{Int} como para Vector{CategoricalValue}
    y_train = vec(Int.(get_val.(y_train_raw)))
    y_test  = vec(Int.(get_val.(y_test_raw)))

    n_inputs = size(x_train, 1)
    println(" Dataset Loaded: $filename")
    println("   > Features: $n_inputs | Samples Train: $(size(x_train, 2))")

    return (x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
end

load_svm_data (generic function with 1 method)

In [9]:
# --- 3. Experiment Configuration ---

# Define SVM configurations (Hyperparameters) to test
# Format: (Name, Kernel, Cost, Gamma, Degree)
svm_configs = [
    ("Linear (C=1)",        Kernel.Linear,      1.0,   0.0, 0),   # Baseline Linear
    ("Linear (C=10)",       Kernel.Linear,      10.0,  0.0, 0),   # Harder margin
    ("RBF (C=1, G=Auto)",   Kernel.RadialBasis, 1.0,   0.0, 0),   # 0.0 in Gamma usually means 1/k (Auto)
    ("RBF (C=10, G=0.1)",   Kernel.RadialBasis, 10.0,  0.1, 0),   # Manual Tuning
    ("RBF (C=100, G=0.01)", Kernel.RadialBasis, 100.0, 0.01, 0),  # High cost, low gamma
    ("RBF (C=10, G=1.0)",   Kernel.RadialBasis, 10.0,  1.0, 0),   # High gamma
    ("Poly (Deg=2, C=1)",   Kernel.Polynomial,  1.0,   0.1, 2),   # Polynomial degree 2
    ("Poly (Deg=3, C=1)",   Kernel.Polynomial,  1.0,   0.1, 3),   # Polynomial degree 3
]

println("SVM configurations loaded: $(length(svm_configs)) scenarios.")

SVM configurations loaded: 8 scenarios.


In [10]:
# --- 4. Experimental Loop (Training & Evaluation) ---

function run_svm_experiment(dataset, approach_name, configs)
    println("\n=================================================================")
    println(" EXPERIMENT: $approach_name (Goal: Maximize Sensitivity/Recall)")
    println("=================================================================")
    
    x_train, y_train = dataset.x_train, dataset.y_train
    x_test, y_test   = dataset.x_test, dataset.y_test
    
    # Identify classes for metrics calculation (0, 1, 2, 3, 4)
    classes = sort(unique(y_train))
    
    results = []

    for (i, conf) in enumerate(configs)
        (name, k_type, c_val, g_val, deg_val) = conf
        
        print(" Testing Config $i/$(length(configs)): $name ... ")
        start_time = time()
        
        # 1. Train SVM
        # Gamma=0.0 defaults to 1/k in LIBSVM
        model = svmtrain(x_train, y_train, 
                         kernel = k_type, cost = c_val, gamma = g_val, degree = deg_val)
        
        train_time = time() - start_time
        
        # 2. Predict
        y_hat, _ = svmpredict(model, x_test)
        y_hat_int = Int.(y_hat) # Convert to Int for compatibility
        
        # 3. Calculate Metrics (Reusing Unit 4 & Unit 2 Functions)
        # [cite_start]This calls confusionMatrix(outputs::Vector, targets::Vector, classes::Vector) from unit4-metrics.jl [cite: 534]
        # [cite_start]which internally calls oneHotEncoding from unit2-multilayer-perceptron.jl [cite: 816]
        metrics = confusionMatrix(y_hat_int, y_test, classes; weighted=false)
        
        # Extract scalar values
        sens = metrics.sensitivity
        acc  = metrics.accuracy * 100
        f1   = metrics.f_score
        spec = metrics.specificity
        prec = metrics.ppv
        
        push!(results, (
            config = name,
            sens = sens,
            acc = acc,
            f1 = f1,
            spec = spec,
            prec = prec,
            time = train_time
        ))
        
        println("Done. Sens: $(round(sens, digits=4)) | Time: $(round(train_time, digits=3))s")
    end
    
    # Sort by Sensitivity (Descending) - Priority for medical diagnosis
    sort!(results, by = x -> x.sens, rev = true)
    
    # Print Table
    println("\n --- FINAL RESULTS FOR $approach_name ---")
    @printf(" %-20s | %-8s | %-8s | %-8s | %-8s | %-8s\n", 
            "Configuration", "Sens", "Acc(%)", "F1", "Spec", "Time(s)")
    println("-"^80)
    
    for res in results
        @printf(" %-20s | %-8.4f | %-8.2f | %-8.4f | %-8.4f | %-8.3f\n", 
                res.config, res.sens, res.acc, res.f1, res.spec, res.time)
    end
    
    return results[1]
end

run_svm_experiment (generic function with 1 method)

In [15]:
# --- 5. Main Execution ---

# Load Data
# Ensure .jld2 files are in the correct path relative to this script
path_minmax = "data_checkpoints/approach_1_minmax.jld2"
path_pca    = "data_checkpoints/approach_2_pca.jld2"
path_ica    = "data_checkpoints/approach_3_ica.jld2"

# Load Datasets
data_minmax = load_svm_data(path_minmax)
data_pca    = load_svm_data(path_pca)
data_ica    = load_svm_data(path_ica)

# Run Experiments
winner_minmax = run_svm_experiment(data_minmax, "APPROACH 1: MINMAX", svm_configs)
winner_pca    = run_svm_experiment(data_pca,    "APPROACH 2: PCA",    svm_configs)
winner_ica    = run_svm_experiment(data_ica,    "APPROACH 3: ICA",    svm_configs)

# Final Summary Table
println("\n\n****************************************************************")
println("      GLOBAL COMPARISON (SVM WINNERS)")
println("****************************************************************")
println(" Approach | Best Config          | Sensib | Acc(%) | Time(s)")
println("----------|----------------------|--------|--------|---------")

@printf(" MinMax   | %-20s | %.4f | %-6.2f | %.3f\n", winner_minmax.config, winner_minmax.sens, winner_minmax.acc, winner_minmax.time)
@printf(" PCA      | %-20s | %.4f | %-6.2f | %.3f\n", winner_pca.config,    winner_pca.sens,    winner_pca.acc,    winner_pca.time)
@printf(" ICA      | %-20s | %.4f | %-6.2f | %.3f\n", winner_ica.config,    winner_ica.sens,    winner_ica.acc,    winner_ica.time)

 Dataset Loaded: data_checkpoints/approach_1_minmax.jld2
   > Features: 31 | Samples Train: 579
 Dataset Loaded: data_checkpoints/approach_2_pca.jld2
   > Features: 17 | Samples Train: 579
 Dataset Loaded: data_checkpoints/approach_3_ica.jld2
   > Features: 28 | Samples Train: 579

 EXPERIMENT: APPROACH 1: MINMAX (Goal: Maximize Sensitivity/Recall)
 Testing Config 1/8: Linear (C=1) ... Done. Sens: 0.3327 | Time: 0.03s
 Testing Config 2/8: Linear (C=10) ... Done. Sens: 0.3245 | Time: 0.06s
 Testing Config 3/8: RBF (C=1, G=Auto) ... Done. Sens: 0.2 | Time: 0.04s
 Testing Config 4/8: RBF (C=10, G=0.1) ... Done. Sens: 0.6002 | Time: 0.047s
 Testing Config 5/8: RBF (C=100, G=0.01) ... Done. Sens: 0.331 | Time: 0.051s
 Testing Config 6/8: RBF (C=10, G=1.0) ... Done. Sens: 0.784 | Time: 0.088s
 Testing Config 7/8: Poly (Deg=2, C=1) ... Done. Sens: 0.3302 | Time: 0.036s
 Testing Config 8/8: Poly (Deg=3, C=1) ... Done. Sens: 0.311 | Time: 0.037s

 --- FINAL RESULTS FOR APPROACH 1: MINMAX ---
 C