# **Step 1: Load All Extracted Feature Arrays**

In [9]:
import numpy as np

# Load your features
bert_features = np.load("/content/bert_features_fixed.npy")
vit_train_features = np.load("/content/vit_train_features (1).npy")  # shape (28709, 768)
xgb_features = np.load("/content/xgboost_features.npy")
wav_features = np.load("/content/wav2vec_features.npy")

print("BERT shape:", bert_features.shape)
print("ViT Train shape:", vit_train_features.shape)
print("XGBoost shape:", xgb_features.shape)
print("Wav2Vec shape:", wav_features.shape)



BERT shape: (5802, 768)
ViT Train shape: (28709, 768)
XGBoost shape: (27901, 17)
Wav2Vec shape: (1148, 768)


In [14]:
import numpy as np

bert_labels = np.load("/content/bert_labels.npy")
vit_train_labels = np.load("/content/vit_train_labels.npy")
vit_test_labels = np.load("/content/vit_test_labels.npy")
wav2vec_labels = np.load("/content/wav2vec_labels.npy")
xgboost_labels = np.load("/content/xgboost_labels.npy")

print("bert_labels:", bert_labels.shape)
print("vit_train_labels:", vit_train_labels.shape)
print("vit_test_labels:", vit_test_labels.shape)
print("wav2vec_labels:", wav2vec_labels.shape)
print("xgboost_labels:", xgboost_labels.shape)


bert_labels: (53043,)
vit_train_labels: (28709,)
vit_test_labels: (14356,)
wav2vec_labels: (1148,)
xgboost_labels: (27901,)


# **ViT + XGBoost Fusion**

In [16]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler

# 1. Load Features and Labels
vit = np.load("/content/vit_train_features (1).npy")[:27901]
xgb = np.load("/content/xgboost_features.npy")[:27901]
labels = np.load("/content/xgboost_labels.npy")[:27901]

print("Loaded shapes:")
print("ViT:", vit.shape)
print("XGBoost:", xgb.shape)
print("Labels:", labels.shape)

# 2. Normalize features
scaler_vit = StandardScaler()
scaler_xgb = StandardScaler()

vit_scaled = scaler_vit.fit_transform(vit)
xgb_scaled = scaler_xgb.fit_transform(xgb)

# 3. Fuse features
fused_features = np.concatenate((vit_scaled, xgb_scaled), axis=1)
print("Fused feature shape:", fused_features.shape)

# 4. Convert to PyTorch tensors
features_tensor = torch.tensor(fused_features, dtype=torch.float32)
labels_tensor = torch.tensor(labels, dtype=torch.long)

# 5. Dataset & Dataloader
class FusionDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return self.features.shape[0]

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

dataset = FusionDataset(features_tensor, labels_tensor)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

# 6. Define Classifier
class FusionClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 512)
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        return self.fc3(x)

# 7. Setup for training
input_dim = fused_features.shape[1]
num_classes = len(np.unique(labels))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = FusionClassifier(input_dim, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
num_epochs = 10

# 8. Training Loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    total_correct = 0
    total_samples = 0

    for batch_features, batch_labels in dataloader:
        batch_features = batch_features.to(device)
        batch_labels = batch_labels.to(device)

        optimizer.zero_grad()
        outputs = model(batch_features)
        loss = criterion(outputs, batch_labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * batch_features.size(0)
        _, preds = torch.max(outputs, 1)
        total_correct += (preds == batch_labels).sum().item()
        total_samples += batch_features.size(0)

    print(f"Epoch {epoch+1}/{num_epochs} — Loss: {total_loss/total_samples:.4f}, Accuracy: {total_correct/total_samples:.4f}")


Loaded shapes:
ViT: (27901, 768)
XGBoost: (27901, 17)
Labels: (27901,)
Fused feature shape: (27901, 785)
Epoch 1/10 — Loss: 0.4619, Accuracy: 0.7803
Epoch 2/10 — Loss: 0.3799, Accuracy: 0.8352
Epoch 3/10 — Loss: 0.3627, Accuracy: 0.8465
Epoch 4/10 — Loss: 0.3533, Accuracy: 0.8486
Epoch 5/10 — Loss: 0.3477, Accuracy: 0.8507
Epoch 6/10 — Loss: 0.3350, Accuracy: 0.8551
Epoch 7/10 — Loss: 0.3280, Accuracy: 0.8588
Epoch 8/10 — Loss: 0.3249, Accuracy: 0.8605
Epoch 9/10 — Loss: 0.3114, Accuracy: 0.8625
Epoch 10/10 — Loss: 0.3060, Accuracy: 0.8643


In [17]:
# STEP-BY-STEP CLASSIFIER TRAINING FOR BERT AND WAV2VEC FEATURES

import numpy as np
import torch
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F

# DEVICE SETUP
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# LOAD FEATURES & LABELS
bert_features = np.load("/content/bert_features_fixed.npy")
bert_labels = np.load("/content/bert_labels.npy")

wav_features = np.load("/content/wav2vec_features.npy")
wav_labels = np.load("/content/wav2vec_labels.npy")

print("Shapes:")
print("BERT:", bert_features.shape, bert_labels.shape)
print("Wav2Vec:", wav_features.shape, wav_labels.shape)

# STANDARD SCALING
scaler_bert = StandardScaler()
scaler_wav = StandardScaler()
bert_scaled = scaler_bert.fit_transform(bert_features)
wav_scaled = scaler_wav.fit_transform(wav_features)

# CONVERT TO TENSORS
bert_X = torch.tensor(bert_scaled, dtype=torch.float32)
bert_y = torch.tensor(bert_labels, dtype=torch.long)

wav_X = torch.tensor(wav_scaled, dtype=torch.float32)
wav_y = torch.tensor(wav_labels, dtype=torch.long)

# DATASET CLASS
class SimpleDataset(Dataset):
    def __init__(self, features, labels):
        self.X = features
        self.y = labels
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# MODEL
class SimpleClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(256, num_classes)
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)

# TRAINING FUNCTION
def train_model(X, y, name):
    dataset = SimpleDataset(X, y)
    dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

    num_classes = len(torch.unique(y))
    model = SimpleClassifier(X.shape[1], num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    print(f"\nTraining {name} classifier:")
    for epoch in range(10):
        model.train()
        total_loss, correct, total = 0, 0, 0
        for features, labels in dataloader:
            features, labels = features.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * features.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += features.size(0)

        acc = correct / total
        print(f"Epoch {epoch+1}/10 - Loss: {total_loss/total:.4f}, Accuracy: {acc:.4f}")

# RUN TRAINING
train_model(bert_X, bert_y, "BERT")
train_model(wav_X, wav_y, "Wav2Vec")


Shapes:
BERT: (5802, 768) (53043,)
Wav2Vec: (1148, 768) (1148,)

Training BERT classifier:
Epoch 1/10 - Loss: 0.5560, Accuracy: 0.8626
Epoch 2/10 - Loss: 0.2693, Accuracy: 0.8912
Epoch 3/10 - Loss: 0.2236, Accuracy: 0.9062
Epoch 4/10 - Loss: 0.1908, Accuracy: 0.9221
Epoch 5/10 - Loss: 0.1649, Accuracy: 0.9354
Epoch 6/10 - Loss: 0.1484, Accuracy: 0.9404
Epoch 7/10 - Loss: 0.1322, Accuracy: 0.9511
Epoch 8/10 - Loss: 0.1157, Accuracy: 0.9583
Epoch 9/10 - Loss: 0.1050, Accuracy: 0.9616
Epoch 10/10 - Loss: 0.0909, Accuracy: 0.9657

Training Wav2Vec classifier:
Epoch 1/10 - Loss: 1.6760, Accuracy: 0.3685
Epoch 2/10 - Loss: 1.1485, Accuracy: 0.6002
Epoch 3/10 - Loss: 0.8855, Accuracy: 0.6943
Epoch 4/10 - Loss: 0.7406, Accuracy: 0.7448
Epoch 5/10 - Loss: 0.6182, Accuracy: 0.7944
Epoch 6/10 - Loss: 0.5109, Accuracy: 0.8537
Epoch 7/10 - Loss: 0.4393, Accuracy: 0.8720
Epoch 8/10 - Loss: 0.3570, Accuracy: 0.9007
Epoch 9/10 - Loss: 0.3228, Accuracy: 0.9103
Epoch 10/10 - Loss: 0.2691, Accuracy: 0.93