In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from Utils import visualization as viz
from Utils import model
from Utils import explainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = model.load_dataset(ide="local")

In [3]:
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

In [4]:
# Label Targeting and Feature Selection
label = df.columns[-1]
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()
feature_cols = [c for c in numeric_cols if c != label]

# Preparing Feature Matrix and Target Vector
X = df[feature_cols].values
y_raw = df[label].values

# Label Encoding the Target Variable
le = LabelEncoder()
y = le.fit_transform(df[label])

class_names = le.classes_

In [5]:
# Splitting the Dataset into Training and Val-Test Sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    stratify=y,
                                                    random_state=42)

# Further Splitting Val-Test Set into Validation and Test Sets
X_val, X_test, y_val, y_test = train_test_split(X_val_test,
                                                y_val_test,
                                                test_size=0.5,
                                                stratify=y_val_test,
                                                random_state=42)

In [6]:
# Preprocessing
X_train_s, X_val_s, _, _ = model.preprocess_data(X_train, X_val)

In [7]:
# Training SVM Model
svm_model = model.train_svm(X_train_s, y_train, C=1.0, kernel='rbf')

# Evaluating SVM Model
model.evaluate(svm_model, X_val_s, y_val, class_names, "SVM (RBF Kernel)")

Training SVM (rbf kernel)...
--- Evaluasi SVM (RBF Kernel) ---
Akurasi: 0.9021

Classification Report:
              precision    recall  f1-score   support

       Basal       0.91      0.95      0.93        21
        Her2       1.00      0.50      0.67        10
        LumA       0.89      0.97      0.93        65
        LumB       0.86      0.83      0.84        29
      Normal       1.00      0.94      0.97        18

    accuracy                           0.90       143
   macro avg       0.93      0.84      0.87       143
weighted avg       0.91      0.90      0.90       143



0.9020979020979021

In [12]:
explainer_svm, shap_svm = explainer.compute_shap_general(
    svm_model,
    X_train_s,
    X_val_s,
    n_background=30,
    n_samples=50,
    nsamples_calc=1000
)

--- Menghitung SHAP (Kernel/SVM) Mode Hemat RAM ---
Target: 50 sampel. Limit permutasi: 1000.
⏳ Memproses sampel 1/50...


100%|██████████| 1/1 [22:56<00:00, 1376.77s/it]


⏳ Memproses sampel 2/50...


100%|██████████| 1/1 [22:36<00:00, 1356.59s/it]


⏳ Memproses sampel 3/50...


100%|██████████| 1/1 [22:00<00:00, 1320.26s/it]


⏳ Memproses sampel 4/50...


100%|██████████| 1/1 [23:52<00:00, 1432.20s/it]


⏳ Memproses sampel 5/50...


100%|██████████| 1/1 [23:47<00:00, 1427.49s/it]


⏳ Memproses sampel 6/50...


100%|██████████| 1/1 [23:24<00:00, 1404.28s/it]


⏳ Memproses sampel 7/50...


100%|██████████| 1/1 [21:08<00:00, 1268.39s/it]


⏳ Memproses sampel 8/50...


100%|██████████| 1/1 [16:01<00:00, 961.84s/it]


⏳ Memproses sampel 9/50...


100%|██████████| 1/1 [15:31<00:00, 931.44s/it]


⏳ Memproses sampel 10/50...


100%|██████████| 1/1 [15:36<00:00, 936.88s/it]


⏳ Memproses sampel 11/50...


100%|██████████| 1/1 [15:12<00:00, 912.80s/it]


⏳ Memproses sampel 12/50...


100%|██████████| 1/1 [15:41<00:00, 941.57s/it]


⏳ Memproses sampel 13/50...


100%|██████████| 1/1 [16:33<00:00, 993.25s/it]


⏳ Memproses sampel 14/50...


100%|██████████| 1/1 [15:33<00:00, 933.38s/it]


⏳ Memproses sampel 15/50...


100%|██████████| 1/1 [15:32<00:00, 932.81s/it]


⏳ Memproses sampel 16/50...


100%|██████████| 1/1 [15:16<00:00, 916.96s/it]


⏳ Memproses sampel 17/50...


100%|██████████| 1/1 [15:42<00:00, 942.81s/it]


⏳ Memproses sampel 18/50...


100%|██████████| 1/1 [15:26<00:00, 926.45s/it]


⏳ Memproses sampel 19/50...


100%|██████████| 1/1 [15:32<00:00, 932.95s/it]


⏳ Memproses sampel 20/50...


100%|██████████| 1/1 [15:26<00:00, 926.05s/it]


⏳ Memproses sampel 21/50...


100%|██████████| 1/1 [15:21<00:00, 921.78s/it]


⏳ Memproses sampel 22/50...


100%|██████████| 1/1 [17:13<00:00, 1033.28s/it]


⏳ Memproses sampel 23/50...


100%|██████████| 1/1 [15:27<00:00, 927.23s/it]


⏳ Memproses sampel 24/50...


100%|██████████| 1/1 [15:24<00:00, 924.61s/it]


⏳ Memproses sampel 25/50...


100%|██████████| 1/1 [15:08<00:00, 908.75s/it]


⏳ Memproses sampel 26/50...


100%|██████████| 1/1 [15:23<00:00, 923.10s/it]


⏳ Memproses sampel 27/50...


100%|██████████| 1/1 [16:05<00:00, 965.62s/it]


⏳ Memproses sampel 28/50...


100%|██████████| 1/1 [15:24<00:00, 924.72s/it]


⏳ Memproses sampel 29/50...


100%|██████████| 1/1 [16:42<00:00, 1002.86s/it]


⏳ Memproses sampel 30/50...


100%|██████████| 1/1 [15:27<00:00, 927.70s/it]


⏳ Memproses sampel 31/50...


100%|██████████| 1/1 [15:27<00:00, 927.74s/it]


⏳ Memproses sampel 32/50...


100%|██████████| 1/1 [15:14<00:00, 914.05s/it]


⏳ Memproses sampel 33/50...


100%|██████████| 1/1 [15:35<00:00, 936.00s/it]


⏳ Memproses sampel 34/50...


100%|██████████| 1/1 [15:41<00:00, 941.54s/it]


⏳ Memproses sampel 35/50...


100%|██████████| 1/1 [15:43<00:00, 943.56s/it]


⏳ Memproses sampel 36/50...


100%|██████████| 1/1 [15:47<00:00, 947.63s/it]


⏳ Memproses sampel 37/50...


100%|██████████| 1/1 [17:13<00:00, 1033.22s/it]


⏳ Memproses sampel 38/50...


100%|██████████| 1/1 [15:49<00:00, 949.89s/it]


⏳ Memproses sampel 39/50...


100%|██████████| 1/1 [19:27<00:00, 1167.78s/it]


⏳ Memproses sampel 40/50...


100%|██████████| 1/1 [15:48<00:00, 948.52s/it]


⏳ Memproses sampel 41/50...


100%|██████████| 1/1 [15:38<00:00, 938.61s/it]


⏳ Memproses sampel 42/50...


100%|██████████| 1/1 [15:31<00:00, 931.58s/it]


⏳ Memproses sampel 43/50...


100%|██████████| 1/1 [16:05<00:00, 965.19s/it]


⏳ Memproses sampel 44/50...


100%|██████████| 1/1 [16:01<00:00, 961.97s/it]


⏳ Memproses sampel 45/50...


100%|██████████| 1/1 [15:43<00:00, 943.51s/it]


⏳ Memproses sampel 46/50...


100%|██████████| 1/1 [16:14<00:00, 974.48s/it]


⏳ Memproses sampel 47/50...


100%|██████████| 1/1 [39:00<00:00, 2340.70s/it]


⏳ Memproses sampel 48/50...


100%|██████████| 1/1 [15:17<00:00, 917.86s/it]


⏳ Memproses sampel 49/50...


100%|██████████| 1/1 [15:49<00:00, 949.09s/it]


⏳ Memproses sampel 50/50...


100%|██████████| 1/1 [15:22<00:00, 922.58s/it]

✅ Selesai. Terdeteksi 5 kelas.





In [13]:
if shap_svm is not None:
    # PENTING: Gunakan jumlah sampel yang SAMA dengan n_samples di compute_shap_general
    n_samples_shap = 50
    X_viz = X_val_s[:n_samples_shap]

    print("\n=== Visualisasi SVM ===")
    viz.plot_beeswarm(shap_svm, X_viz, feature_cols, class_names, target_class='all')

    # Plot pasien pertama
    viz.plot_waterfall(shap_svm, explainer_svm, X_viz, feature_cols, class_names, sample_idx=0, class_idx=0)