In [19]:
from collections import defaultdict

features_root = 'features_resnet'
label_map = {}
class_counts = defaultdict(int)

for idx, label in enumerate(sorted(os.listdir(features_root))):
    label_dir = os.path.join(features_root, label)
    if os.path.isdir(label_dir):
        label_map[label] = idx
        class_counts[idx] = len([f for f in os.listdir(label_dir) if f.endswith(".npy")])

print("Total classes:", len(label_map))


Total classes: 2000


In [21]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

total_samples = sum(class_counts.values())
num_classes = len(class_counts)

weights = [total_samples / (num_classes * class_counts[i]) for i in range(num_classes)]
weights_tensor = torch.tensor(weights, dtype=torch.float).to(device)

print("✅ Class weights ready:", weights_tensor.shape)


✅ Class weights ready: torch.Size([2000])


In [23]:
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torch
from torch.nn.utils.rnn import pad_sequence

class SignLanguageDataset(Dataset):
    def __init__(self, features_root, label_map):
        self.samples = []
        for label in os.listdir(features_root):
            label_dir = os.path.join(features_root, label)
            if os.path.isdir(label_dir):
                for file in os.listdir(label_dir):
                    if file.endswith(".npy"):
                        self.samples.append((os.path.join(label_dir, file), label_map[label]))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        feature_path, label_idx = self.samples[idx]
        feature = np.load(feature_path)
        return torch.tensor(feature, dtype=torch.float32), torch.tensor(label_idx, dtype=torch.long)

def collate_fn(batch):
    sequences, labels = zip(*batch)
    padded_sequences = pad_sequence(sequences, batch_first=True)  # (batch, max_seq_len, 512)
    labels = torch.stack(labels)
    return padded_sequences, labels

dataset = SignLanguageDataset(features_root, label_map)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)


In [25]:
import torch.nn as nn

class GRUClassifier(nn.Module):
    def __init__(self, input_dim=512, hidden_dim=256, num_layers=2, num_classes=2000, dropout=0.3):
        super(GRUClassifier, self).__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, num_classes)

    def forward(self, x):
        gru_out, _ = self.gru(x)
        last_out = gru_out[:, -1, :]  # Use last time step
        logits = self.fc(last_out)
        return logits


In [29]:
from tqdm import tqdm
import torch.nn as nn
import torch
from sklearn.metrics import classification_report

model = GRUClassifier(num_classes=len(label_map)).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss(weight=weights_tensor)

# ✅ Learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=2, verbose=True
)

num_epochs = 30  # ✅ Increased from 10 to 30

for epoch in range(num_epochs):
    model.train()
    total_loss, total_correct, total_samples = 0, 0, 0

    loop = tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)

    for batch_x, batch_y in loop:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = loss_fn(outputs, batch_y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * batch_x.size(0)
        _, preds = torch.max(outputs, dim=1)
        total_correct += (preds == batch_y).sum().item()
        total_samples += batch_x.size(0)

        loop.set_postfix(loss=loss.item(), accuracy=f"{(total_correct / total_samples) * 100:.2f}%")

    avg_loss = total_loss / total_samples
    accuracy = total_correct / total_samples * 100
    scheduler.step(avg_loss)  # ✅ Reduce LR on plateau

    print(f"✅ Epoch {epoch+1} Completed — Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")

# ✅ Evaluation after training
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for batch_x, batch_y in dataloader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        outputs = model(batch_x)
        _, preds = torch.max(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(batch_y.cpu().numpy())

# 📊 Classification report to evaluate which classes model is getting wrong
print("\n📊 Classification Report:")
print(classification_report(all_labels, all_preds, target_names=list(label_map.keys())))


                                                                                                                       

✅ Epoch 1 Completed — Loss: 7.6648, Accuracy: 0.04%


                                                                                                                       

✅ Epoch 2 Completed — Loss: 7.6481, Accuracy: 0.04%


                                                                                                                       

✅ Epoch 3 Completed — Loss: 7.6412, Accuracy: 0.03%


                                                                                                                       

✅ Epoch 4 Completed — Loss: 7.6373, Accuracy: 0.02%


                                                                                                                       

✅ Epoch 5 Completed — Loss: 7.6328, Accuracy: 0.01%


                                                                                                                       

✅ Epoch 6 Completed — Loss: 7.6299, Accuracy: 0.08%


                                                                                                                       

✅ Epoch 7 Completed — Loss: 7.6253, Accuracy: 0.06%


                                                                                                                       

✅ Epoch 8 Completed — Loss: 7.6156, Accuracy: 0.02%


                                                                                                                       

✅ Epoch 9 Completed — Loss: 7.5947, Accuracy: 0.06%


                                                                                                                       

✅ Epoch 10 Completed — Loss: 7.5744, Accuracy: 0.07%


                                                                                                                       

✅ Epoch 11 Completed — Loss: 7.5370, Accuracy: 0.08%


                                                                                                                       

✅ Epoch 12 Completed — Loss: 7.4849, Accuracy: 0.15%


                                                                                                                       

✅ Epoch 13 Completed — Loss: 7.4102, Accuracy: 0.20%


                                                                                                                       

✅ Epoch 14 Completed — Loss: 7.2614, Accuracy: 0.22%


                                                                                                                       

✅ Epoch 15 Completed — Loss: 6.9944, Accuracy: 0.34%


                                                                                                                       

✅ Epoch 16 Completed — Loss: 6.6948, Accuracy: 0.53%


                                                                                                                       

✅ Epoch 17 Completed — Loss: 6.3799, Accuracy: 0.94%


                                                                                                                       

✅ Epoch 18 Completed — Loss: 6.0672, Accuracy: 1.24%


                                                                                                                       

✅ Epoch 19 Completed — Loss: 5.7282, Accuracy: 2.11%


                                                                                                                       

✅ Epoch 20 Completed — Loss: 5.3832, Accuracy: 2.94%


                                                                                                                       

✅ Epoch 21 Completed — Loss: 5.0344, Accuracy: 4.43%


                                                                                                                       

✅ Epoch 22 Completed — Loss: 4.6567, Accuracy: 6.43%


                                                                                                                       

✅ Epoch 23 Completed — Loss: 4.2920, Accuracy: 9.82%


                                                                                                                       

✅ Epoch 24 Completed — Loss: 3.9456, Accuracy: 13.46%


                                                                                                                       

✅ Epoch 25 Completed — Loss: 3.5995, Accuracy: 17.13%


                                                                                                                       

✅ Epoch 26 Completed — Loss: 3.2873, Accuracy: 22.09%


                                                                                                                       

✅ Epoch 27 Completed — Loss: 2.9537, Accuracy: 27.90%


                                                                                                                       

✅ Epoch 28 Completed — Loss: 2.6653, Accuracy: 33.06%


                                                                                                                       

✅ Epoch 29 Completed — Loss: 2.4029, Accuracy: 38.43%


                                                                                                                       

✅ Epoch 30 Completed — Loss: 2.1678, Accuracy: 43.05%

📊 Classification Report:
                  precision    recall  f1-score   support

               a       1.00      0.25      0.40         4
           a lot       0.50      0.29      0.36         7
         abdomen       0.45      1.00      0.62         5
            able       0.67      0.40      0.50         5
           about       1.00      0.71      0.83         7
           above       0.67      0.80      0.73         5
          accent       1.00      1.00      1.00         5
          accept       0.83      0.62      0.71         8
        accident       1.00      0.38      0.56        13
      accomplish       0.83      1.00      0.91         5
      accountant       0.50      1.00      0.67         5
          across       0.60      0.50      0.55         6
             act       0.60      0.60      0.60         5
          action       1.00      0.50      0.67         6
          active       0.42      1.00      0.59  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [35]:
# Continue training from previous model state
start_epoch = 30
num_epochs = 50  # 30 done + 20 more

for epoch in range(start_epoch, num_epochs):
    model.train()
    total_loss, total_correct, total_samples = 0, 0, 0

    loop = tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)

    for batch_x, batch_y in loop:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = loss_fn(outputs, batch_y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * batch_x.size(0)
        _, preds = torch.max(outputs, dim=1)
        total_correct += (preds == batch_y).sum().item()
        total_samples += batch_x.size(0)

        loop.set_postfix(loss=loss.item(), accuracy=f"{(total_correct / total_samples) * 100:.2f}%")

    avg_loss = total_loss / total_samples
    accuracy = total_correct / total_samples * 100
    scheduler.step(avg_loss)

    print(f"✅ Epoch {epoch+1} Completed — Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")


                                                                                                                       

✅ Epoch 31 Completed — Loss: 1.9616, Accuracy: 48.31%


                                                                                                                       

✅ Epoch 32 Completed — Loss: 1.7702, Accuracy: 52.80%


                                                                                                                       

✅ Epoch 33 Completed — Loss: 1.6067, Accuracy: 57.65%


                                                                                                                       

✅ Epoch 34 Completed — Loss: 1.4832, Accuracy: 59.74%


                                                                                                                       

✅ Epoch 35 Completed — Loss: 1.3373, Accuracy: 63.87%


                                                                                                                       

✅ Epoch 36 Completed — Loss: 1.2477, Accuracy: 66.27%


                                                                                                                       

✅ Epoch 37 Completed — Loss: 1.1638, Accuracy: 68.70%


                                                                                                                       

✅ Epoch 38 Completed — Loss: 1.1032, Accuracy: 70.23%


                                                                                                                       

✅ Epoch 39 Completed — Loss: 0.9981, Accuracy: 73.00%


                                                                                                                       

✅ Epoch 40 Completed — Loss: 0.9534, Accuracy: 73.92%


                                                                                                                       

✅ Epoch 41 Completed — Loss: 0.8932, Accuracy: 75.47%


                                                                                                                       

✅ Epoch 42 Completed — Loss: 0.8376, Accuracy: 77.01%


                                                                                                                       

✅ Epoch 43 Completed — Loss: 0.7785, Accuracy: 78.41%


                                                                                                                       

✅ Epoch 44 Completed — Loss: 0.7653, Accuracy: 78.96%


                                                                                                                       

✅ Epoch 45 Completed — Loss: 0.7412, Accuracy: 79.45%


                                                                                                                       

✅ Epoch 46 Completed — Loss: 0.7105, Accuracy: 80.38%


                                                                                                                       

✅ Epoch 47 Completed — Loss: 0.7354, Accuracy: 80.26%


                                                                                                                       

✅ Epoch 48 Completed — Loss: 0.6633, Accuracy: 81.56%


                                                                                                                       

✅ Epoch 49 Completed — Loss: 0.6602, Accuracy: 81.72%


                                                                                                                       

✅ Epoch 50 Completed — Loss: 0.6348, Accuracy: 82.08%




In [37]:
checkpoint = {
    'epoch': num_epochs,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': avg_loss,
}

torch.save(checkpoint, 'gru_classifier_checkpoint.pth')
print("Checkpoint saved successfully.")


Checkpoint saved successfully.


In [39]:
# Save the entire model
torch.save(model.state_dict(), 'gru_classifier_model.pth')
print("Model saved successfully.")


Model saved successfully.


In [49]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torch.nn.utils.rnn import pad_sequence
from sklearn.metrics import classification_report

# Dataset class (same as before)
class SignLanguageDataset(Dataset):
    def __init__(self, features_root, label_map):
        self.samples = []
        for label in os.listdir(features_root):
            label_dir = os.path.join(features_root, label)
            if os.path.isdir(label_dir):
                for file in os.listdir(label_dir):
                    if file.endswith(".npy"):
                        self.samples.append((os.path.join(label_dir, file), label_map[label]))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        feature_path, label_idx = self.samples[idx]
        feature = np.load(feature_path)
        return torch.tensor(feature, dtype=torch.float32), torch.tensor(label_idx, dtype=torch.long)

def collate_fn(batch):
    sequences, labels = zip(*batch)
    padded_sequences = pad_sequence(sequences, batch_first=True)
    labels = torch.stack(labels)
    return padded_sequences, labels

# Load your dataset
dataset = SignLanguageDataset(features_root, label_map)

# Split into train/val - we only use val set here
val_ratio = 0.2
val_size = int(len(dataset) * val_ratio)
train_size = len(dataset) - val_size
_, val_dataset = random_split(dataset, [train_size, val_size])

val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

# Load your model class and instantiate
model = GRUClassifier(num_classes=len(label_map)).to(device)
model.load_state_dict(torch.load('gru_classifier_model.pth', map_location=device))  # Update path if needed
model.eval()

all_preds, all_labels = [], []

with torch.no_grad():
    for batch_x, batch_y in val_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        outputs = model(batch_x)
        _, preds = torch.max(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(batch_y.cpu().numpy())

# Compute accuracy
correct = sum(p == t for p, t in zip(all_preds, all_labels))
accuracy = 100 * correct / len(all_labels)
print(f"Validation Accuracy: {accuracy:.2f}%\n")

# Prepare classification report labels & names
import numpy as np
unique_labels = np.unique(all_labels)
index_to_label = {v: k for k, v in label_map.items()}
target_names = [index_to_label[i] for i in unique_labels]

print("Classification Report:")
print(classification_report(all_labels, all_preds, labels=unique_labels, target_names=target_names))


  model.load_state_dict(torch.load('gru_classifier_model.pth', map_location=device))  # Update path if needed


Validation Accuracy: 89.02%

Classification Report:
                  precision    recall  f1-score   support

           a lot       1.00      0.50      0.67         2
         abdomen       1.00      1.00      1.00         1
            able       0.00      0.00      0.00         1
           about       1.00      1.00      1.00         2
           above       1.00      1.00      1.00         1
          accent       1.00      1.00      1.00         3
          accept       1.00      1.00      1.00         1
        accident       0.75      1.00      0.86         3
      accomplish       1.00      1.00      1.00         2
      accountant       1.00      1.00      1.00         1
          across       1.00      1.00      1.00         1
             act       1.00      1.00      1.00         1
          action       0.00      0.00      0.00         1
        activity       1.00      1.00      1.00         1
           actor       1.00      1.00      1.00         2
           adapt   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
