In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/data-baru/Normalisasi_data.csv


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from tabulate import tabulate  # Untuk tampilan tabel lebih rapi

# **1. Load Dataset**
df = pd.read_csv("/kaggle/input/data-baru/Normalisasi_data.csv")

# **2. Preprocessing**
df["datetime"] = pd.to_datetime(df["datetime"])
df["hour"] = df["datetime"].dt.hour  
df["weekday"] = df["datetime"].dt.weekday  
df["day"] = df["datetime"].dt.day  
df["month"] = df["datetime"].dt.month  
df["year"] = df["datetime"].dt.year  

# **Konversi Participant ID (pid) ke Kode Numerik**
group_column = "pid"
df[group_column] = df[group_column].astype("category").cat.codes  

feature_columns = [
    "AU24", "hour", "weekday", "day", "month", "Eye_Open_Avg", "Facial_Structure", 
    "AU23", "AU01", "AU07", "AU02", "AU_Smile", "AU10", "AU14"
]
target_column = "depression_episode"

X = df[feature_columns]
y = df[target_column]

# **3. Normalisasi Data**
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# **4. Custom Dataset Class**
class DepressionDataset(Dataset):
    def __init__(self, X, y, pids):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32).unsqueeze(1)
        self.pids = torch.tensor(pids.to_numpy(), dtype=torch.int32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx], self.pids[idx]

# **5. Inisialisasi Model Neural Network**
class DepressionMLP(nn.Module):
    def __init__(self, input_size):
        super(DepressionMLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

# **6. Stratified K-Fold Cross-Validation**
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

fold_results = []
participant_results = []  # Menyimpan hasil evaluasi per participant
input_dim = X_scaled.shape[1]

for fold, (train_idx, test_idx) in enumerate(kf.split(X_scaled, y)):
    print(f"\n🔹 Fold {fold+1}/5")

    # **Bagi dataset berdasarkan index dari K-Fold**
    train_dataset = Subset(DepressionDataset(X_scaled, y, df[group_column]), train_idx)
    test_dataset = Subset(DepressionDataset(X_scaled, y, df[group_column]), test_idx)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # **Inisialisasi ulang model setiap fold**
    model = DepressionMLP(input_dim).to(device)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # **Training Loop**
    num_epochs = 25
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0

        for X_batch, y_batch, _ in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        # **Evaluasi Setiap Epoch**
        model.eval()
        y_pred_list, y_true_list = [], []

        with torch.no_grad():
            for X_batch, y_batch, _ in test_loader:
                X_batch = X_batch.to(device)
                y_pred = model(X_batch).cpu().numpy()
                y_pred_list.extend(y_pred)
                y_true_list.extend(y_batch.numpy())

        y_pred_list = np.array(y_pred_list).flatten()
        y_pred_labels = (y_pred_list > 0.5).astype(int)

        acc = accuracy_score(y_true_list, y_pred_labels)
        precision = precision_score(y_true_list, y_pred_labels, zero_division=0)
        recall = recall_score(y_true_list, y_pred_labels, zero_division=0)
        f1 = f1_score(y_true_list, y_pred_labels, zero_division=0)
        auc = roc_auc_score(y_true_list, y_pred_list) if len(np.unique(y_true_list)) > 1 else np.nan

        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {total_loss/len(train_loader):.4f} "
              f"- Accuracy: {acc:.4f} - Precision: {precision:.4f} - Recall: {recall:.4f} "
              f"- F1 Score: {f1:.4f} - AUC: {auc:.4f}")

    # **Evaluasi Per Participant**
    model.eval()
    y_pred_list, y_true_list, pid_list = [], [], []

    with torch.no_grad():
        for X_batch, y_batch, pids in test_loader:
            X_batch = X_batch.to(device)
            y_pred = model(X_batch).cpu().numpy()
            y_pred_list.extend(y_pred)
            y_true_list.extend(y_batch.numpy())
            pid_list.extend(pids.numpy())

    y_pred_list = np.array(y_pred_list).flatten()
    y_pred_labels = (y_pred_list > 0.5).astype(int)

    unique_pids = np.unique(pid_list)
    for pid in unique_pids:
        mask = np.array(pid_list) == pid
        y_true_pid = np.array(y_true_list)[mask]
        y_pred_pid = np.array(y_pred_labels)[mask]
        y_score_pid = np.array(y_pred_list)[mask]

        if len(np.unique(y_true_pid)) > 1:
            auc_score = roc_auc_score(y_true_pid, y_score_pid)
        else:
            auc_score = np.nan

        participant_results.append({
            "Participant": pid,
            "Fold": fold + 1,
            "Accuracy": accuracy_score(y_true_pid, y_pred_pid),
            "Precision": precision_score(y_true_pid, y_pred_pid, zero_division=0),
            "Recall": recall_score(y_true_pid, y_pred_pid, zero_division=0),
            "F1 Score": f1_score(y_true_pid, y_pred_pid, zero_division=0),
            "AUC": auc_score
        })

# **7. Simpan & Tampilkan Hasil Evaluasi**
participant_df = pd.DataFrame(participant_results)

# **Hitung Rata-rata Hasil Evaluasi**
avg_results = {
    "Participant": "Average",
    "Fold": "-",
    "Accuracy": participant_df["Accuracy"].mean(),
    "Precision": participant_df["Precision"].mean(),
    "Recall": participant_df["Recall"].mean(),
    "F1 Score": participant_df["F1 Score"].mean(),
    "AUC": participant_df["AUC"].mean()
}

# **Gabungkan Hasil Individual dengan Rata-rata**
participant_df = pd.concat([participant_df, pd.DataFrame([avg_results])], ignore_index=True)

# **Tampilkan Hasil Evaluasi dengan Rata-rata**
print("\n🔍 Hasil Evaluasi Per Participant + Average:")
print(tabulate(participant_df, headers="keys", tablefmt="grid", floatfmt=".4f"))

# **Simpan Hasil Evaluasi ke CSV**
participant_df.to_csv("hasil_evaluasi_per_participant.csv", index=False)
print("✅ Hasil evaluasi per participant disimpan dalam 'hasil_evaluasi_per_participant.csv'")


🔹 Fold 1/5
Epoch 1/25 - Loss: 0.3062 - Accuracy: 0.8885 - Precision: 0.8924 - Recall: 0.8500 - F1 Score: 0.8707 - AUC: 0.9623
Epoch 2/25 - Loss: 0.2679 - Accuracy: 0.8940 - Precision: 0.8904 - Recall: 0.8666 - F1 Score: 0.8783 - AUC: 0.9669
Epoch 3/25 - Loss: 0.2583 - Accuracy: 0.8986 - Precision: 0.8937 - Recall: 0.8745 - F1 Score: 0.8840 - AUC: 0.9688
Epoch 4/25 - Loss: 0.2526 - Accuracy: 0.9047 - Precision: 0.9148 - Recall: 0.8648 - F1 Score: 0.8891 - AUC: 0.9708
Epoch 5/25 - Loss: 0.2482 - Accuracy: 0.9067 - Precision: 0.9115 - Recall: 0.8738 - F1 Score: 0.8922 - AUC: 0.9722
Epoch 6/25 - Loss: 0.2454 - Accuracy: 0.9079 - Precision: 0.9054 - Recall: 0.8839 - F1 Score: 0.8945 - AUC: 0.9724
Epoch 7/25 - Loss: 0.2436 - Accuracy: 0.9068 - Precision: 0.9079 - Recall: 0.8782 - F1 Score: 0.8928 - AUC: 0.9731
Epoch 8/25 - Loss: 0.2409 - Accuracy: 0.9073 - Precision: 0.9011 - Recall: 0.8875 - F1 Score: 0.8942 - AUC: 0.9725
Epoch 9/25 - Loss: 0.2395 - Accuracy: 0.9101 - Precision: 0.9103 - R