In [None]:
import numpy as np
import os
import sys

# --- Define Paths ---
FEATURE_DIR = "/content/drive/MyDrive/NNDL/Features"

# Path to your final 1000-row OCEAN score ground truth array (the input for the mapping)
y_ocean_path = os.path.join(FEATURE_DIR, "seq_y.npy")

# Output path for the new 4-trait MBTI binary label array
mbti_y_path = os.path.join(FEATURE_DIR, "seq_y_mbti.npy")

# --- 1. Load OCEAN Ground Truth ---
print("--- Loading OCEAN Ground Truth Scores ---")
try:
    # Load the (1000, 5) array of true OCEAN scores
    seq_y_ocean = np.load(y_ocean_path)
    N = len(seq_y_ocean)
    if seq_y_ocean.shape[1] != 5:
        raise ValueError(f"Expected 5 columns, found {seq_y_ocean.shape[1]}")

    # üö® CRITICAL CHECK: Determine the order of the 5 traits in your array
    # We assume the order is: [Openness (O), Conscientiousness (C), Extraversion (E), Agreeableness (A), Neuroticism (N)]
    O_IDX = 0 # Maps to N/S
    C_IDX = 1 # Maps to J/P
    E_IDX = 2 # Maps to E/I
    A_IDX = 3 # Maps to F/T (Feeling/Thinking)
    N_IDX = 4 # Not used in this primary mapping

    print(f"‚úÖ Loaded {N} videos with OCEAN scores.")
    print(f"   Assumed order: O({O_IDX}), C({C_IDX}), E({E_IDX}), A({A_IDX}), N({N_IDX})")

except Exception as e:
    print(f"‚ùå ERROR loading OCEAN data from {y_ocean_path}: {e}. Exiting.")
    sys.exit()

# --- 2. Define Mapping Parameters ---
# MBTI output order: [E/I, S/N, T/F, J/P]
mbti_labels = np.zeros((N, 4), dtype=np.float32)
THRESHOLD = 0.5
print(f"Applying binary mapping using a threshold of {THRESHOLD}...")

# --- 3. Apply Threshold Mapping ---

# 1. E/I (Extraversion/Introversion) <-> Extraversion (E)
# High E (score > 0.5) maps to E (1). Low E maps to I (0).
mbti_labels[:, 0] = (seq_y_ocean[:, E_IDX] > THRESHOLD).astype(np.float32)
# Trait 1 (MBTI Index 0): E (1) or I (0)

# 2. S/N (Sensing/Intuition) <-> Openness (O)
# High O (score > 0.5) maps to N (Intuition, 1). Low O maps to S (Sensing, 0).
mbti_labels[:, 1] = (seq_y_ocean[:, O_IDX] > THRESHOLD).astype(np.float32)
# Trait 2 (MBTI Index 1): N (1) or S (0)

# 3. T/F (Thinking/Feeling) <-> Agreeableness (A)
# High A (score > 0.5) maps to F (Feeling, 1). Low A maps to T (Thinking, 0).
mbti_labels[:, 2] = (seq_y_ocean[:, A_IDX] > THRESHOLD).astype(np.float32)
# Trait 3 (MBTI Index 2): F (1) or T (0)

# 4. J/P (Judging/Perceiving) <-> Conscientiousness (C)
# High C (score > 0.5) maps to J (Judging, 1). Low C maps to P (Perceiving, 0).
mbti_labels[:, 3] = (seq_y_ocean[:, C_IDX] > THRESHOLD).astype(np.float32)
# Trait 4 (MBTI Index 3): J (1) or P (0)


# --- 4. Save the MBTI Ground Truth ---
np.save(mbti_y_path, mbti_labels)

print("\n--- Mapping Complete ---")
print(f"MBTI Label array shape: {mbti_labels.shape}")
print(f"Output MBTI order: [E/I, S/N, T/F, J/P]")
print(f"Video 1 Labels: {mbti_labels[0]}")
print(f"Saved MBTI labels to: {mbti_y_path}")

--- Loading OCEAN Ground Truth Scores ---
‚úÖ Loaded 1000 videos with OCEAN scores.
   Assumed order: O(0), C(1), E(2), A(3), N(4)
Applying binary mapping using a threshold of 0.5...

--- Mapping Complete ---
MBTI Label array shape: (1000, 4)
Output MBTI order: [E/I, S/N, T/F, J/P]
Video 1 Labels: [0. 0. 0. 1.]
Saved MBTI labels to: /content/drive/MyDrive/NNDL/Features/seq_y_mbti.npy


In [None]:
import numpy as np
import os
import sys
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, random_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# --- Define Paths ---
FEATURE_DIR = "/content/drive/MyDrive/NNDL/Features"

# Input X for the MLP is the 5-dim OCEAN Ground Truth
y_ocean_path = os.path.join(FEATURE_DIR, "seq_y.npy")
# Target Y for the MLP is the 4-dim MBTI Ground Truth
mbti_y_path = os.path.join(FEATURE_DIR, "seq_y_mbti.npy")

MODEL_SAVE_PATH_MBTI = os.path.join(FEATURE_DIR, "best_mbti_mlp.pth") # Final MLP Checkpoint

# --- 2. Load Data ---
print("--- Loading OCEAN Input and MBTI Target Data ---")
try:
    seq_y_ocean = np.load(y_ocean_path)
    mbti_labels = np.load(mbti_y_path)
    N = len(seq_y_ocean)

    if seq_y_ocean.shape != (N, 5) or mbti_labels.shape != (N, 4):
         raise ValueError(f"Shape mismatch: OCEAN {seq_y_ocean.shape}, MBTI {mbti_labels.shape}")

    print(f"‚úÖ Data loaded successfully. N={N} sequences.")

except Exception as e:
    print(f"‚ùå Error loading data: {e}. Exiting.")
    sys.exit()

# --- 3. Prepare Tensors and Split ---
ocean_input_tensor = torch.tensor(seq_y_ocean, dtype=torch.float32) # (1000, 5)
mbti_target_tensor = torch.tensor(mbti_labels, dtype=torch.float32)  # (1000, 4)

dataset = TensorDataset(ocean_input_tensor, mbti_target_tensor)

train_size = int(0.7 * N)
val_size   = int(0.15 * N)
test_size  = N - train_size - val_size

train_ds, val_ds, test_ds = random_split(dataset, [train_size, val_size, test_size])

BATCH_SIZE = 32
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(test_ds, batch_size=1, shuffle=False)

print(f"Train: {len(train_ds)} | Val: {len(val_ds)} | Test: {len(test_ds)}")

# --- 4. Define the Small MLP Classifier ---
class MBTIMLPClassifier(nn.Module):
    def __init__(self, input_dim=5, output_dim=4):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, output_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

# --- 5. Training Loop ---
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

model = MBTIMLPClassifier(input_dim=5, output_dim=4).to(DEVICE)
criterion = nn.BCELoss() # Binary Cross-Entropy Loss
optimizer = torch.optim.AdamW(model.parameters(), lr=20e-4, weight_decay=1e-5)

best_val_loss = float("inf")
patience = 10
pat_ctr = 0
EPOCHS = 100

print(f"\nStarting MLP training on {DEVICE}...")

for epoch in range(1, EPOCHS + 1):
    model.train()
    train_loss = 0
    for x, y in train_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)

        optimizer.zero_grad()
        preds = model(x)
        loss = criterion(preds, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * x.size(0)

    # -------- Validation --------
    model.eval()
    val_loss = 0
    y_true_val, y_pred_val = [], []

    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            preds = model(x)
            loss = criterion(preds, y)
            val_loss += loss.item() * x.size(0)

            y_true_val.append(y.cpu().numpy())
            y_pred_val.append((preds > 0.5).float().cpu().numpy())

    avg_train_loss = train_loss / len(train_ds)
    avg_val_loss = val_loss / len(val_ds)

    y_true_val = np.vstack(y_true_val)
    y_pred_val = np.vstack(y_pred_val)

    val_accuracy = accuracy_score(y_true_val, y_pred_val)
    val_f1_macro = f1_score(y_true_val, y_pred_val, average='macro', zero_division=0)

    print(f"Epoch {epoch:02d} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | "
          f"Val Acc: {val_accuracy:.4f} | Val F1 (Macro): {val_f1_macro:.4f}")

    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        best_state = model.state_dict()
        pat_ctr = 0
        torch.save(best_state, MODEL_SAVE_PATH_MBTI)
        # print(f"¬†¬† üíæ Best MLP model saved.")
    else:
        pat_ctr += 1
        if pat_ctr >= patience:
            print("‚èπ Early stopping triggered")
            break

model.load_state_dict(best_state)

# --- 6. Final Test Evaluation ---
model.eval()
true_list, pred_list = [], []

with torch.no_grad():
    for x, y in test_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        preds = model(x)

        true_list.append(y.cpu().numpy())
        pred_list.append((preds > 0.5).float().cpu().numpy())

y_true_test = np.vstack(true_list)
y_pred_test = np.vstack(pred_list)

test_accuracy = accuracy_score(y_true_test, y_pred_test)
test_f1_macro = f1_score(y_true_test, y_pred_test, average='macro', zero_division=0)
test_precision = precision_score(y_true_test, y_pred_test, average='macro', zero_division=0)
test_recall = recall_score(y_true_test, y_pred_test, average='macro', zero_division=0)


print("\n================= FINAL MBTI MLP TEST RESULTS =================")
print(f"Accuracy (Total Label Match): {test_accuracy:.4f}")
print(f"F1 Score (Macro, per trait): {test_f1_macro:.4f}")
print("=============================================================")

--- Loading OCEAN Input and MBTI Target Data ---
‚úÖ Data loaded successfully. N=1000 sequences.
Train: 700 | Val: 150 | Test: 150

Starting MLP training on cpu...
Epoch 01 | Train Loss: 0.6845 | Val Loss: 0.6728 | Val Acc: 0.1067 | Val F1 (Macro): 0.5714
Epoch 02 | Train Loss: 0.6650 | Val Loss: 0.6436 | Val Acc: 0.2067 | Val F1 (Macro): 0.6688
Epoch 03 | Train Loss: 0.6304 | Val Loss: 0.6044 | Val Acc: 0.3267 | Val F1 (Macro): 0.7727
Epoch 04 | Train Loss: 0.5936 | Val Loss: 0.5523 | Val Acc: 0.3533 | Val F1 (Macro): 0.8120
Epoch 05 | Train Loss: 0.5447 | Val Loss: 0.4889 | Val Acc: 0.5067 | Val F1 (Macro): 0.8501
Epoch 06 | Train Loss: 0.4945 | Val Loss: 0.4293 | Val Acc: 0.5200 | Val F1 (Macro): 0.8650
Epoch 07 | Train Loss: 0.4493 | Val Loss: 0.3901 | Val Acc: 0.5333 | Val F1 (Macro): 0.8711
Epoch 08 | Train Loss: 0.4191 | Val Loss: 0.3604 | Val Acc: 0.5400 | Val F1 (Macro): 0.8778
Epoch 09 | Train Loss: 0.3967 | Val Loss: 0.3482 | Val Acc: 0.5200 | Val F1 (Macro): 0.8757
Epoch 10

In [None]:
import numpy as np
import os
import sys
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# ====================================================================
# ‚ö†Ô∏è NECESSARY CLASS DEFINITIONS
# ====================================================================

# 1. OceanDataset (The data wrapper for splitting)
class OceanDataset(Dataset):
    """Dataset wrapper for sequence data, only used here to facilitate the random_split."""
    def __init__(self, X, Y, IDs):
        self.X = X
        self.Y = Y # Dummy array for split
        self.IDs = IDs

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        # Return as NumPy arrays/strings for collate_fn to handle padding and Tensors
        return {"x": self.X[idx], "y": self.Y[idx], "id": self.IDs[idx]}

# 2. collate_fn (The padding function for DataLoader)
def collate_fn(batch):
    """Pads sequences in a batch to the maximum length and creates a mask."""
    # Separate sequences and non-sequence data
    sequences = [item["x"] for item in batch]
    ids = [item["id"] for item in batch]

    # Determine max length
    max_len = max(len(s) for s in sequences)

    # Pad sequences and create mask
    padded_sequences = []
    masks = []
    for seq in sequences:
        # Pad with zeros
        padding_needed = max_len - len(seq)
        padded_seq = np.pad(seq, ((0, padding_needed), (0, 0)), mode='constant', constant_values=0.0)
        padded_sequences.append(padded_seq)

        # Create mask: 1 for real data, 0 for padding
        mask = np.zeros(max_len, dtype=np.bool_)
        mask[:len(seq)] = 1
        masks.append(mask)

    # Convert to Tensors
    x = torch.tensor(np.stack(padded_sequences), dtype=torch.float32) # Shape: (B, T, D)
    mask = torch.tensor(np.stack(masks), dtype=torch.bool)          # Shape: (B, T)

    return {"x": x, "mask": mask, "id": ids}


# 3. OceanEncBiGRUTransformer (Phase 1 Model - OCEAN Score Regression)
# Using the BiGRU version which successfully loaded previously with the given checkpoint.
class OceanEncBiGRUTransformer(nn.Module):
    def __init__(
        self,
        input_dim=1291, # Default is based on common multimodal features
        clip_dim=512, fer_dim=7, wav_dim=768, prosody_dim=4, # Assumed feature dims
        clip_enc_dim=128, fer_enc_dim=32, wav_enc_dim=128, prosody_enc_dim=16,
        d_model=256, nhead=8, num_layers=2, dim_feedforward=512, dropout=0.2,
    ):
        super().__init__()

        self.clip_dim = clip_dim
        self.fer_dim = fer_dim
        self.wav_dim = wav_dim
        self.prosody_dim = prosody_dim

        # -------------------------
        # üîπ Modality Encoders
        # -------------------------
        self.clip_encoder = nn.Sequential(
            nn.Linear(clip_dim, clip_enc_dim), nn.ReLU(), nn.LayerNorm(clip_enc_dim),
        )
        self.fer_encoder = nn.Sequential(
            nn.Linear(fer_dim, fer_enc_dim), nn.ReLU(), nn.LayerNorm(fer_enc_dim),
        )
        self.wav_encoder = nn.Sequential(
            nn.Linear(wav_dim, wav_enc_dim), nn.ReLU(), nn.LayerNorm(wav_enc_dim),
        )
        self.prosody_encoder = nn.Sequential(
            nn.Linear(prosody_dim, prosody_enc_dim), nn.ReLU(), nn.LayerNorm(prosody_enc_dim),
        )

        fused_dim = clip_enc_dim + fer_enc_dim + wav_enc_dim + prosody_enc_dim

        self.fuse_proj = nn.Sequential(
            nn.Linear(fused_dim, d_model), nn.ReLU(), nn.LayerNorm(d_model),
        )

        # -------------------------
        # üîπ BiGRU
        # -------------------------
        self.bigru = nn.GRU(
            input_size=d_model, hidden_size=d_model // 2, num_layers=1, batch_first=True, bidirectional=True,
        )
        self.gru_ln = nn.LayerNorm(d_model)

        # -------------------------
        # üîπ Transformer
        # -------------------------
        self.cls_token = nn.Parameter(torch.randn(1, 1, d_model))

        layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout, batch_first=True,
        )
        self.encoder = nn.TransformerEncoder(layer, num_layers=num_layers)

        # -------------------------
        # üîπ Regression Head
        # -------------------------
        self.head = nn.Sequential(
            nn.Linear(d_model, d_model),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_model, 5), # 5 continuous OCEAN scores
        )

    def forward(self, x, mask):
        B, T, D = x.shape

        # Split modalities (assuming the feature order from the training code)
        x_clip = x[:, :, 0:self.clip_dim]
        x_fer = x[:, :, self.clip_dim:self.clip_dim+self.fer_dim]
        x_wav = x[:, :, self.clip_dim+self.fer_dim:self.clip_dim+self.fer_dim+self.wav_dim]
        x_pros = x[:, :, -self.prosody_dim:]

        # Encode modalities
        e_clip = self.clip_encoder(x_clip)
        e_fer = self.fer_encoder(x_fer)
        e_wav = self.wav_encoder(x_wav)
        e_pros = self.prosody_encoder(x_pros)

        fused = torch.cat([e_clip, e_fer, e_wav, e_pros], dim=-1)
        fused = self.fuse_proj(fused)

        # BiGRU
        gru_out, _ = self.bigru(fused)
        gru_out = self.gru_ln(gru_out)

        # CLS + Transformer
        cls_tok = self.cls_token.expand(B, 1, -1)
        seq = torch.cat([cls_tok, gru_out], dim=1)

        cls_mask = torch.ones(B, 1, device=mask.device)
        full_mask = torch.cat([cls_mask, mask], dim=1)
        key_mask = (full_mask == 0) # True indicates padding

        enc = self.encoder(seq, src_key_padding_mask=key_mask)
        cls_out = enc[:, 0] # Take the output of the CLS token

        # Predict OCEAN
        return self.head(cls_out)


# 4. MBTIMLPClassifier (Phase 2 Model - MBTI Classification)
# ‚ö†Ô∏è FIXED: hidden_dim changed to 32 and one Dropout layer removed to match the saved checkpoint architecture.
class MBTIMLPClassifier(nn.Module):
    """Phase 2 Model: Takes 5 OCEAN scores and predicts 4 MBTI traits."""
    def __init__(self, input_dim=5, output_dim=4, hidden_dim=32, dropout=0.5): # hidden_dim=32 fixed
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),         # net.0: (5, 32)
            nn.ReLU(),                                # net.1
            nn.Dropout(dropout),                      # net.2
            nn.Linear(hidden_dim, hidden_dim // 2),   # net.3: (32, 16)
            nn.ReLU(),                                # net.4
            # nn.Dropout(dropout),                    # net.5: REMOVED (This was the mismatch)
            nn.Linear(hidden_dim // 2, output_dim),   # net.5: (16, 4) <-- Index shifted to 5
            nn.Sigmoid()                              # net.6 <-- Index shifted to 6
        )

    def forward(self, x):
        return self.net(x)

# --------------------------------------------------------------------
# --- 1. Define Final Test Paths and Parameters ---
# --------------------------------------------------------------------
FEATURE_DIR = "/content/drive/MyDrive/NNDL/Features"

# Data paths for Test Loader creation (as per original script)
X_final_path = os.path.join(FEATURE_DIR, "seq_X.npy")
mbti_y_path = os.path.join(FEATURE_DIR, "seq_y_mbti.npy")
seq_id_path = os.path.join(FEATURE_DIR, "seq_id.npy")

# Model checkpoint paths - Using the uploaded file names
P1_MODEL_PATH = "/content/drive/MyDrive/NNDL/Features/ocean_enc_bigru_trans_best.pth" # Phase 1 (OCEAN)
P2_MODEL_PATH = "/content/drive/MyDrive/NNDL/Features/best_mbti_mlp(1).pth"# Phase 2 (MBTI)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# --------------------------------------------------------------------
# --- 2. Load Test Data & Prepare DataLoader ---
# --------------------------------------------------------------------
print("--- Loading Full Test Dataset (X and MBTI Y) ---")
try:
    # Load all data needed
    seq_X_raw = np.load(X_final_path, allow_pickle=True)
    seq_y_mbti = np.load(mbti_y_path)
    seq_id = np.load(seq_id_path)
    N = len(seq_X_raw)

    # Pre-process X list and determine input dimension
    seq_X = [np.array(x, dtype=np.float32) for x in seq_X_raw]
    INPUT_DIM = seq_X[0].shape[1] if N > 0 and seq_X[0].ndim == 2 else 0

except Exception as e:
    print(f"‚ùå Error loading final data for test: {e}. Exiting.")
    sys.exit()

# --- Replicate the exact data split used during training ---
dummy_y = np.zeros((N, 5), dtype=np.float32)
dataset = OceanDataset(seq_X, dummy_y, seq_id)

train_size = int(0.7 * N)
val_size   = int(0.15 * N)
test_size  = N - train_size - val_size

# Split the full dataset to get the test set indices
_, _, test_ds_split = random_split(dataset, [train_size, val_size, test_size])

# Filter the MBTI ground truth array to match the test set indices
test_indices = test_ds_split.indices
y_true_mbti_test = seq_y_mbti[test_indices]

# Create the final Test Loader using the correct split
test_loader_full = DataLoader(test_ds_split, batch_size=1, shuffle=False, collate_fn=collate_fn)


# --------------------------------------------------------------------
# --- 3. Load Models ---
# --------------------------------------------------------------------
print("\n--- Loading Phase 1 (OCEAN) and Phase 2 (MBTI) Models ---")

# Phase 1: BiGRU-Transformer
p1_model = OceanEncBiGRUTransformer(input_dim=INPUT_DIM).to(DEVICE)
try:
    # Load state dict
    p1_model.load_state_dict(torch.load(P1_MODEL_PATH, map_location=DEVICE))
    p1_model.eval()
    print(f"‚úÖ Loaded Phase 1 Model (BiGRU-T) from {P1_MODEL_PATH}")
except Exception as e:
    print(f"‚ùå Error loading Phase 1 model from {P1_MODEL_PATH}: {e}")
    sys.exit()

# Phase 2: MLP Classifier
# Instantiate with hidden_dim=32 to match the checkpoint size
p2_model = MBTIMLPClassifier(input_dim=5, output_dim=4, hidden_dim=32).to(DEVICE)
try:
    p2_model.load_state_dict(torch.load(P2_MODEL_PATH, map_location=DEVICE))
    p2_model.eval()
    print(f"‚úÖ Loaded Phase 2 Model (MLP) from {P2_MODEL_PATH}")
except Exception as e:
    print(f"‚ùå Error loading Phase 2 model from {P2_MODEL_PATH}: {e}")
    sys.exit()


# --------------------------------------------------------------------
# --- 4. Two-Phase Inference ---
# --------------------------------------------------------------------
print("\n--- Running Two-Phase Inference on Test Set ---")
predicted_mbti_list = []

with torch.no_grad():
    for batch in test_loader_full:
        x, mask = batch["x"].to(DEVICE), batch["mask"].to(DEVICE)

        # PHASE 1: Predict OCEAN scores (5 continuous values)
        predicted_ocean_scores = p1_model(x, mask)

        # PHASE 2: Predict MBTI traits (4 binary labels)
        predicted_mbti_probs = p2_model(predicted_ocean_scores)

        # Convert probabilities (0-1) to binary predictions (0 or 1)
        predicted_mbti = (predicted_mbti_probs > 0.5).float().cpu().numpy()
        predicted_mbti_list.append(predicted_mbti)

# --------------------------------------------------------------------
# --- 5. Final Evaluation ---
# --------------------------------------------------------------------
y_pred_mbti_test = np.vstack(predicted_mbti_list)
y_true_mbti_test = y_true_mbti_test.astype(int)
y_pred_mbti_test = y_pred_mbti_test.astype(int)

# Compute classification metrics
final_subset_accuracy = accuracy_score(y_true_mbti_test, y_pred_mbti_test)

# --- CALCULATE TRAIT-WISE ACCURACY ---
trait_accuracies = []
for i in range(y_true_mbti_test.shape[1]): # Loop through the 4 traits/columns
    trait_acc = accuracy_score(y_true_mbti_test[:, i], y_pred_mbti_test[:, i])
    trait_accuracies.append(trait_acc)

final_avg_trait_accuracy = np.mean(trait_accuracies)

final_f1_macro = f1_score(y_true_mbti_test, y_pred_mbti_test, average='macro', zero_division=0)
final_precision = precision_score(y_true_mbti_test, y_pred_mbti_test, average='macro', zero_division=0)
final_recall = recall_score(y_true_mbti_test, y_pred_mbti_test, average='macro', zero_division=0)


print("\n================= FINAL END-TO-END MBTI RESULTS =================")
print(f"Total Test Samples: {len(y_true_mbti_test)}")
print(f"Accuracy (Total Label Match - Subset): {final_subset_accuracy:.4f}")
print(f"Accuracy (AVERAGE TRAIT-WISE): {final_avg_trait_accuracy:.4f}")
print(f"F1 Score (Macro, per trait): {final_f1_macro:.4f}")
print(f"Precision (Macro): {final_precision:.4f}")
print(f"Recall (Macro): {final_recall:.4f}")
print("===============================================================")

--- Loading Full Test Dataset (X and MBTI Y) ---

--- Loading Phase 1 (OCEAN) and Phase 2 (MBTI) Models ---
‚úÖ Loaded Phase 1 Model (BiGRU-T) from /content/drive/MyDrive/NNDL/Features/ocean_enc_bigru_trans_best.pth
‚úÖ Loaded Phase 2 Model (MLP) from /content/drive/MyDrive/NNDL/Features/best_mbti_mlp(1).pth

--- Running Two-Phase Inference on Test Set ---

Total Test Samples: 150
Accuracy (Total Label Match - Subset): 0.4000
Accuracy (AVERAGE TRAIT-WISE): 0.7367
F1 Score (Macro, per trait): 0.7810
Precision (Macro): 0.6934
Recall (Macro): 0.9100
