In [1]:
# 0. Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 1. Import Libraries
import numpy as np
import pandas as pd
import os
import random
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from scipy.stats.mstats import winsorize
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping

# 2. Set Seed untuk Konsistensi
SEED = 44
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['TF_DETERMINISTIC_OPS'] = '1'

# 3. Load Dataset
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Skripsi/Dataset/diabetes_012_health_indicators_BRFSS2015.csv')
X = data.drop('Diabetes_012', axis=1)

# 4. Preprocessing (Sesuai Model Default)
X['BMI'] = winsorize(X['BMI'], limits=[0.005, 0.005])
X['MentHlth'] = np.where(X['MentHlth'] > 30, 30, X['MentHlth'])
X['PhysHlth'] = np.where(X['PhysHlth'] > 30, 30, X['PhysHlth'])

robust_features = ['BMI', 'MentHlth', 'PhysHlth']
minmax_features = ['Age', 'Education', 'Income', 'GenHlth']

scaler_robust = RobustScaler()
scaler_minmax = MinMaxScaler()

X[robust_features] = scaler_robust.fit_transform(X[robust_features])
X[minmax_features] = scaler_minmax.fit_transform(X[minmax_features])

# 5. Split Data untuk AE
X_train_ae, X_test_ae = train_test_split(X, test_size=0.2, random_state=SEED)

# 6. Build Autoencoder AEC1 dengan LeakyReLU
input_dim = X.shape[1]
input_layer = Input(shape=(input_dim,))

# Encoder
x = Dense(14)(input_layer)
x = LeakyReLU(alpha=0.1)(x)
x = Dense(12)(x)
x = LeakyReLU(alpha=0.1)(x)

# Latent space
latent = Dense(10)(x)
latent = LeakyReLU(alpha=0.1)(latent)

# Decoder
x = Dense(12)(latent)
x = LeakyReLU(alpha=0.1)(x)
x = Dense(14)(x)
x = LeakyReLU(alpha=0.1)(x)

# Output
output_layer = Dense(input_dim, activation='linear')(x)

# Model
autoencoder = Model(inputs=input_layer, outputs=output_layer)
autoencoder.compile(optimizer='adam', loss='mse')

# 7. Train Autoencoder
history = autoencoder.fit(
    X_train_ae, X_train_ae,
    validation_data=(X_test_ae, X_test_ae),
    epochs=100,
    batch_size=50,
    verbose=1,
    callbacks=[EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)]
)

# 8. Simpan Model
save_dir = '/content/drive/MyDrive/Colab Notebooks/Skripsi/Model/Model Autoencoders'
os.makedirs(save_dir, exist_ok=True)
# autoencoder.save(f'{save_dir}/aec1_model_leakyrelu.h5')

# 9. Ekstrak Encoder
encoder = Model(inputs=input_layer, outputs=latent)

# 10. Transformasi seluruh data X ke bentuk laten
X_latent = encoder.predict(X)

# 11. Buat DataFrame hasil reduksi dimensi
latent_df = pd.DataFrame(X_latent, columns=[f'Latent_{i+1}' for i in range(X_latent.shape[1])])

# 12. Tambahkan label
latent_df['Diabetes_012'] = data['Diabetes_012']

# 13. Simpan hasil ke direktori
output_path = '/content/drive/MyDrive/Colab Notebooks/Skripsi/Dataset/V2 Hasil Eksperimen/AE'
os.makedirs(output_path, exist_ok=True)
latent_df.to_csv(f'{output_path}/AEC1_dataset_latent.csv', index=False)

print("✅ Dataset latent berhasil disimpan.")


Mounted at /content/drive
Epoch 1/100




[1m4059/4059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - loss: 0.2889 - val_loss: 0.0603
Epoch 2/100
[1m4059/4059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 3ms/step - loss: 0.0568 - val_loss: 0.0475
Epoch 3/100
[1m4059/4059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 0.0453 - val_loss: 0.0416
Epoch 4/100
[1m4059/4059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - loss: 0.0409 - val_loss: 0.0401
Epoch 5/100
[1m4059/4059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - loss: 0.0401 - val_loss: 0.0398
Epoch 6/100
[1m4059/4059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 0.0398 - val_loss: 0.0395
Epoch 7/100
[1m4059/4059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - loss: 0.0396 - val_loss: 0.0391
Epoch 8/100
[1m4059/4059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - loss: 0.0369 - val_loss: 0.0335
Epoch 9/100
[1m4059