In [1]:
# 0. Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 1. Import Libraries
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.metrics import classification_report, roc_auc_score
from scipy.stats.mstats import winsorize
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, MultiHeadAttention, LayerNormalization, Dropout, Lambda
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import seaborn as sns

# 2. Set Seed untuk Konsistensi
SEED = 44
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['TF_DETERMINISTIC_OPS'] = '1'

# 3. Load Dataset
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Skripsi/Dataset/V2 Hasil Eksperimen/AE/AEB1_dataset_latent.csv')
X = data.drop('Diabetes_012', axis=1)
y = data['Diabetes_012']

# 5. One-hot Encoding untuk Target
y_encoded = to_categorical(y)

# 6. Split Data (Train/Test -> Train/Val)
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=SEED, stratify=y
)
X_train_main, X_val, y_train_main, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=SEED, stratify=y_train.argmax(axis=1)
)

# 7. Build DNN + Self-Attention Layer (Moved before Output Layer)
input_layer = Input(shape=(X.shape[1],))
x = Dense(64, activation='relu')(input_layer)
x = Dense(32, activation='relu')(x)
x = Dense(16, activation='relu')(x)

# Expand dimensi untuk Self-Attention
x_expanded = Lambda(lambda t: tf.expand_dims(t, axis=1))(x)

# Self-Attention sebelum output
attn_output = MultiHeadAttention(num_heads=4, key_dim=4)(x_expanded, x_expanded)
attn_output = LayerNormalization()(attn_output + x_expanded)

# Kembalikan ke bentuk 2D
attn_output_squeezed = Lambda(lambda t: tf.squeeze(t, axis=1))(attn_output)

# Output layer
output_layer = Dense(3, activation='softmax')(attn_output_squeezed)

# Final Model
model_sa = Model(inputs=input_layer, outputs=output_layer)
model_sa.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 8. Train Model (Updated Epochs: 100)
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history_sa = model_sa.fit(
    X_train_main, y_train_main,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=50,
    callbacks=[early_stop],
    verbose=1
)

# 9. Evaluate
y_pred_prob_sa = model_sa.predict(X_test)
y_pred_class_sa = np.argmax(y_pred_prob_sa, axis=1)
y_true_class_sa = np.argmax(y_test, axis=1)

print("\n=== Classification Report (Macro) DNN AE + SA ===")
print(classification_report(y_true_class_sa, y_pred_class_sa, digits=4))
roc_auc_sa = roc_auc_score(y_test, y_pred_prob_sa, average='macro', multi_class='ovr')
print(f"\nMacro-average ROC AUC: {roc_auc_sa:.4f}")


Mounted at /content/drive
Epoch 1/100
[1m3248/3248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 5ms/step - accuracy: 0.8414 - loss: 0.4332 - val_accuracy: 0.8448 - val_loss: 0.4311
Epoch 2/100
[1m3248/3248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 5ms/step - accuracy: 0.8464 - loss: 0.4135 - val_accuracy: 0.8458 - val_loss: 0.4252
Epoch 3/100
[1m3248/3248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 5ms/step - accuracy: 0.8467 - loss: 0.4121 - val_accuracy: 0.8458 - val_loss: 0.4182
Epoch 4/100
[1m3248/3248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 5ms/step - accuracy: 0.8470 - loss: 0.4111 - val_accuracy: 0.8464 - val_loss: 0.4165
Epoch 5/100
[1m3248/3248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 5ms/step - accuracy: 0.8476 - loss: 0.4104 - val_accuracy: 0.8463 - val_loss: 0.4137
Epoch 6/100
[1m3248/3248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 5ms/step - accuracy: 0.8477 - loss: 0.4096 - val_accuracy: 0.84

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
