In [None]:
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torchvision.models.video import r3d_18
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Convert frames and labels to torch tensors
train_frames_tensor = torch.tensor(train_frames)
train_labels_tensor = torch.tensor(train_labels)
test_frames_tensor = torch.tensor(test_frames)
test_labels_tensor = torch.tensor(test_labels)

# Create custom Dataset class
class CustomDataset(Dataset):
    def __init__(self, frames, labels):
        self.frames = frames
        self.labels = labels

    def __len__(self):
        return len(self.frames)

    def __getitem__(self, idx):
        return self.frames[idx], self.labels[idx]

# Create train and test dataset objects
train_dataset = CustomDataset(train_frames_tensor, train_labels_tensor)
test_dataset = CustomDataset(test_frames_tensor, test_labels_tensor)

# Create train and test data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Define the model architecture
class HybridModel(nn.Module):
    def __init__(self, num_classes, num_frames, embed_dim, hidden_dim, num_heads, num_layers):
        super(HybridModel, self).__init__()

        # 3D Convolutional Neural Network (3DCNN)
        self.cnn = r3d_18(pretrained=True)
        self.cnn.fc = nn.Identity()

        # Transformer Network
        self.transformer_encoder = TransformerEncoder(
            TransformerEncoderLayer(embed_dim, num_heads, hidden_dim),
            num_layers
        )

        # Classification layer
        self.classifier = nn.Linear(embed_dim, num_classes)

        self.num_frames = num_frames
        self.embed_dim = embed_dim

    def forward(self, x):
        # Reshape input to (batch_size * num_frames, C, D, H, W)
        batch_size = x.size(0)
        x = x.view(-1, x.size(2), x.size(3), x.size(4))

        # Pass input through 3DCNN
        features = self.cnn(x)

        # Reshape features to (batch_size, num_frames, embed_dim)
        features = features.view(batch_size, self.num_frames, -1)

        # Permute features to (num_frames, batch_size, embed_dim)
        features = features.permute(1, 0, 2)

        # Pass features through Transformer Encoder
        features = self.transformer_encoder(features)

        # Average pooling over the temporal dimension
        features = torch.mean(features, dim=0)

        # Pass features through classification layer
        logits = self.classifier(features)

        return logits

# Instantiate the model
num_classes = 10
num_frames = train_frames.shape[1]
embed_dim = 512
hidden_dim = 2048
num_heads = 8
num_layers = 6
model = HybridModel(num_classes, num_frames, embed_dim, hidden_dim, num_heads, num_layers)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for frames, labels in train_loader:
        frames = frames.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        logits = model(frames)
        loss = criterion(logits, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * frames.size(0)

    # Calculate average loss for the epoch
    epoch_loss = running_loss / len(train_dataset)

    # Evaluate the model on the test set
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for frames, labels in test_loader:
            frames = frames.to(device)
            labels = labels.to(device)

            # Forward pass
            logits = model(frames)
            _, predicted = torch.max(logits.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Calculate accuracy on the test set
    accuracy = 100 * correct / total

    # Print the loss and accuracy for the epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.2f}%")

# Save the trained model
torch.save(model.state_dict(), "hybrid_model.pt")


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np

# Load preprocessed data
train_frames = np.load("train_frames.npy")
train_labels = np.load("train_labels.npy")
test_frames = np.load("test_frames.npy")
test_labels = np.load("test_labels.npy")

# Convert data to torch tensors
train_frames_tensor = torch.from_numpy(train_frames).float()
train_labels_tensor = torch.from_numpy(train_labels).long()
test_frames_tensor = torch.from_numpy(test_frames).float()
test_labels_tensor = torch.from_numpy(test_labels).long()

# Create dataset objects
train_dataset = Dataset(train_frames_tensor, train_labels_tensor)
test_dataset = Dataset(test_frames_tensor, test_labels_tensor)

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Define model architecture
class Model(nn.Module):
    def __init__(self, num_classes):
        super(Model, self).__init__()
        # Add 3D convolutional layers
        self.conv1 = nn.Conv3d(in_channels=3, out_channels=16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        self.conv2 = nn.Conv3d(in_channels=16, out_channels=32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        self.conv3 = nn.Conv3d(in_channels=32, out_channels=64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        # Add Transformer layers
        self.transformer1 = nn.Transformer(d_model=64, nhead=8, num_encoder_layers=2, num_decoder_layers=2)
        self.transformer2 = nn.Transformer(d_model=64, nhead=8, num_encoder_layers=2, num_decoder_layers=2)
        # Add linear layer for classification
        self.linear = nn.Linear(in_features=64, out_features=num_classes)

    def forward(self, x):
        # Forward pass through 3D convolutional layers
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        # Forward pass through Transformer layers
        x = self.transformer1(x)
        x = self.transformer2(x)
        # Flatten and pass through linear layer for classification
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x

# Instantiate the model
num_classes = ...  # Specify the number of classes in your WLASL dataset
model = Model(num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for frames, labels in train_loader:
        frames = frames.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(frames)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print training loss for each epoch
    print("Epoch {} loss: {:.4f}".format(epoch+1, running_loss/len(train_loader)))

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for frames, labels in test_loader:
        frames = frames.to(device)
        labels = labels.to(device)

        outputs = model(frames)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Print test accuracy
    print("Test accuracy: {:.2f}%".format(100 * correct / total))

# Save the trained model
torch.save(model.state_dict(), "trained_model.pth")



In [None]:
import torch.nn as nn
import torch.optim as optim

# Define model architecture
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        # Add 3D convolutional layers
        self.conv1 = nn.Conv3d(...)
        self.conv2 = nn.Conv3d(...)
        self.conv3 = nn.Conv3d(...)
        # Add Transformer layers
        self.transformer1 = nn.Transformer(...)
        self.transformer2 = nn.Transformer(...)
        # Add linear layer for classification
        self.linear = nn.Linear(...)

    def forward(self, x):
        # Forward pass through 3D convolutional layers
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        # Forward pass through Transformer layers
        x = self.transformer1(x)
        x = self.transformer2(x)
        # Flatten and pass through linear layer for classification
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

# Load preprocessed data
train_frames = np.load(r"C:\Users\Jeffery.st\Desktop\st.Jeffery00\0.projectfiles\archive\processed_frames\train_frames.npy")
train_labels = np.load(r"C:\Users\Jeffery.st\Desktop\st.Jeffery00\0.projectfiles\archive\processed_frames\train_labels.npy")
test_frames = np.load(r"C:\Users\Jeffery.st\Desktop\st.Jeffery00\0.projectfiles\archive\processed_frames\test_frames.npy")
test_labels = np.load(r"C:\Users\Jeffery.st\Desktop\st.Jeffery00\0.projectfiles\archive\processed_frames\test_labels.npy")

# Convert data to torch tensors
train_frames_tensor = torch.from_numpy(train_frames).float()
train_labels_tensor = torch.from_numpy(train_labels).long()
test_frames_tensor = torch.from_numpy(test_frames).float()
test_labels_tensor = torch.from_numpy(test_labels).long()

# Create dataset objects
train_dataset = Dataset(frames=train_frames_tensor, labels=train_labels_tensor)
test_dataset = Dataset(frames=test_frames_tensor, labels=test_labels_tensor)

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)


In [None]:
import cv2

# Function to extract frames from a video file
def extract_frames(video_path):
    frames = []
    cap = cv2.VideoCapture(video_path)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()
    return frames

# Iterate over the rows of features_df DataFrame
for index, row in features_df.iterrows():
    video_id = row['video_id']
    video_path = f'{main_path}videos/{video_id}.mp4'

    # Load and decode the video file
    frames = extract_frames(video_path)


    # ...
