In [None]:
# 0. Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 1. Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.metrics import classification_report, roc_auc_score
from scipy.stats.mstats import winsorize
from imblearn.over_sampling import SMOTE

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, MultiHeadAttention, LayerNormalization, Lambda
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical

# 2. Set Seed untuk Konsistensi
SEED = 44
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['TF_DETERMINISTIC_OPS'] = '1'

# 3. Load Dataset
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Skripsi/Dataset/V2 Hasil Eksperimen/AE/AEB1_dataset_latent.csv')
X = data.drop('Diabetes_012', axis=1)
y = data['Diabetes_012']

# 5. Split Data (Train/Test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=SEED, stratify=y
)

# 6. SMOTE on Train Data Only
smote = SMOTE(random_state=SEED)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)


# 7. One-hot Encoding
y_train_encoded = to_categorical(y_train_smote)
y_test_encoded = to_categorical(y_test)

# 8. Split Train → Train Main & Validation
X_train_main, X_val, y_train_main, y_val = train_test_split(
    X_train_smote, y_train_encoded, test_size=0.2, random_state=SEED, stratify=y_train_smote
)

# 9. Build DNN + Self-Attention Model
input_layer = Input(shape=(X.shape[1],))
x = Dense(64, activation='relu')(input_layer)
x = Dense(32, activation='relu')(x)
x = Dense(16, activation='relu')(x)

# Expand dimension for self-attention
x_expanded = Lambda(lambda t: tf.expand_dims(t, axis=1))(x)

# Self-Attention Layer
attn_output = MultiHeadAttention(num_heads=4, key_dim=4)(x_expanded, x_expanded)
attn_output = LayerNormalization()(attn_output + x_expanded)

# Flatten back to 2D
attn_output_squeezed = Lambda(lambda t: tf.squeeze(t, axis=1))(attn_output)

# Output Layer
output_layer = Dense(3, activation='softmax')(attn_output_squeezed)

# Final Model
model = Model(inputs=input_layer, outputs=output_layer)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 10. Train Model
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(
    X_train_main, y_train_main,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=50,
    callbacks=[early_stop],
    verbose=1
)

# 11. Evaluate on Test Set
y_pred_prob = model.predict(X_test)
y_pred_class = np.argmax(y_pred_prob, axis=1)
y_true_class = np.argmax(y_test_encoded, axis=1)

# 12. Metrik Evaluasi
print("\n=== Classification Report (Macro) Model DNN + AE + SMOTE + SA ===")
print(classification_report(y_true_class, y_pred_class, digits=4))
roc_auc = roc_auc_score(y_test_encoded, y_pred_prob, average='macro', multi_class='ovr')
print(f"\nMacro-average ROC AUC: {roc_auc:.4f}")


Mounted at /content/drive
Epoch 1/100
[1m8207/8207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 5ms/step - accuracy: 0.4906 - loss: 0.9881 - val_accuracy: 0.5143 - val_loss: 0.9616
Epoch 2/100
[1m8207/8207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 5ms/step - accuracy: 0.5104 - loss: 0.9653 - val_accuracy: 0.5254 - val_loss: 0.9468
Epoch 3/100
[1m8207/8207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 5ms/step - accuracy: 0.5195 - loss: 0.9513 - val_accuracy: 0.5307 - val_loss: 0.9377
Epoch 4/100
[1m8207/8207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 5ms/step - accuracy: 0.5254 - loss: 0.9433 - val_accuracy: 0.5330 - val_loss: 0.9349
Epoch 5/100
[1m8207/8207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 5ms/step - accuracy: 0.5288 - loss: 0.9386 - val_accuracy: 0.5385 - val_loss: 0.9287
Epoch 6/100
[1m8207/8207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 5ms/step - accuracy: 0.5325 - loss: 0.9344 - val_accuracy: 0.54