In [1]:
import os
import shutil
from collections import defaultdict
from sklearn.model_selection import train_test_split

# Path to the folder containing the 150 video files
SOURCE_DIR = 'C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/videos'  
DEST_DIR = 'C:/Users/wafab/OneDrive/Desktop/OSL_Wafa'  # Destination folder to create train/test folders

# Create destination directory structure
os.makedirs(os.path.join(DEST_DIR, 'train'), exist_ok=True)
os.makedirs(os.path.join(DEST_DIR, 'test'), exist_ok=True)

# Group videos by prefix before underscore (e.g., "ajudar" from "ajudar_ne_1")
grouped_videos = defaultdict(list)
for file in os.listdir(SOURCE_DIR):
    if file.endswith(('.mp4', '.avi', '.mov')):  # Add other video extensions if needed
        prefix = file.split('_')[0]
        grouped_videos[prefix].append(file)

# For each class/prefix, split into train/test and copy files
for class_name, files in grouped_videos.items():
    # Sort to ensure consistent splitting
    files.sort()
    
    train_files, test_files = train_test_split(files, test_size=0.3, random_state=42)
    
    # Create class folders in train and test directories
    train_class_dir = os.path.join(DEST_DIR, 'train', class_name)
    test_class_dir = os.path.join(DEST_DIR, 'test', class_name)
    os.makedirs(train_class_dir, exist_ok=True)
    os.makedirs(test_class_dir, exist_ok=True)
    
    # Copy files to train folder
    for f in train_files:
        shutil.copy(os.path.join(SOURCE_DIR, f), os.path.join(train_class_dir, f))
    
    # Copy files to test folder
    for f in test_files:
        shutil.copy(os.path.join(SOURCE_DIR, f), os.path.join(test_class_dir, f))

print("Dataset structure created successfully.")

Dataset structure created successfully.


In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import os

dataset_path = os.listdir('C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train')

label_types = os.listdir('C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train')
print (label_types)  

['ajudar', 'animal', 'aniversário', 'ano', 'banana', 'banheiro', 'bebê', 'cabeça', 'café', 'carne', 'casa', 'cebola', 'comer', 'cortar', 'crescer', 'família', 'filho', 'garganta', 'homem', 'jovem', 'ouvir', 'pai', 'sopa', 'sorvete', 'vagina']


In [3]:
rooms = []

for item in dataset_path:
 # Get all the file names
 all_rooms = os.listdir('C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train' + '/' +item)

 # Add them to the list
 for room in all_rooms:
    rooms.append((item, str('C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train' + '/' +item) + '/' + room))
    
# Build a dataframe        
train_df = pd.DataFrame(data=rooms, columns=['tag', 'video_name'])
print(train_df.head())
print(train_df.tail())

      tag                                         video_name
0  ajudar  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...
1  ajudar  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...
2  ajudar  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...
3  ajudar  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...
4  animal  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...
        tag                                         video_name
95  sorvete  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...
96   vagina  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...
97   vagina  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...
98   vagina  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...
99   vagina  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...


In [5]:
df = train_df.loc[:,['video_name','tag']]
df
df.to_csv('train.csv')

In [6]:
dataset_path = os.listdir('C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/test')
print(dataset_path)

room_types = os.listdir('C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/test')
print("Types of activities found: ", len(dataset_path))

rooms = []

for item in dataset_path:
 # Get all the file names
 all_rooms = os.listdir('C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/test' + '/' +item)

 # Add them to the list
 for room in all_rooms:
    rooms.append((item, str('C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/test' + '/' +item) + '/' + room))
    
# Build a dataframe        
test_df = pd.DataFrame(data=rooms, columns=['tag', 'video_name'])
print(test_df.head())
print(test_df.tail())

df = test_df.loc[:,['video_name','tag']]
df
df.to_csv('test.csv')

['ajudar', 'animal', 'aniversário', 'ano', 'banana', 'banheiro', 'bebê', 'cabeça', 'café', 'carne', 'casa', 'cebola', 'comer', 'cortar', 'crescer', 'família', 'filho', 'garganta', 'homem', 'jovem', 'ouvir', 'pai', 'sopa', 'sorvete', 'vagina']
Types of activities found:  25
           tag                                         video_name
0       ajudar  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/test/...
1       ajudar  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/test/...
2       animal  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/test/...
3       animal  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/test/...
4  aniversário  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/test/...
        tag                                         video_name
45     sopa  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/test/...
46  sorvete  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/test/...
47  sorvete  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/test/...
48   vagina  C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/test/...
49   vagina  C:

In [11]:
import os
import cv2
import torch
import imageio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [12]:
# Check if CUDA is available and limit GPU memory usage
if torch.cuda.is_available():
    # Set device
    device = torch.device("cuda:0")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")

    # Limit GPU memory usage (PyTorch doesn't support hard limits like TensorFlow, but we can reserve less memory manually)
    # One way to simulate limiting memory usage is to set a max batch size or pre-allocate a smaller chunk
    torch.cuda.empty_cache()  # Clears unused memory
    # NOTE: PyTorch dynamically allocates memory. Manual limiting needs custom allocator changes or use of smaller batch sizes.
else:
    device = torch.device("cpu")
    print("Using CPU")

Using CPU


In [13]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

print(f"Total videos for training: {len(train_df)}")
print(f"Total videos for testing: {len(test_df)}")


train_df.sample(10)

Total videos for training: 100
Total videos for testing: 50


Unnamed: 0.1,Unnamed: 0,video_name,tag
59,59,C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...,crescer
78,78,C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...,jovem
85,85,C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...,pai
66,66,C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...,filho
72,72,C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...,homem
47,47,C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...,cebola
40,40,C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...,casa
18,18,C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...,banana
25,25,C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...,bebê
61,61,C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/train...,família


In [14]:
import cv2
import torch
import numpy as np
from torchvision import transforms

IMG_SIZE = 224

def crop_center_square(frame):
    y, x = frame.shape[:2]
    min_dim = min(y, x)
    start_x = (x - min_dim) // 2
    start_y = (y - min_dim) // 2
    return frame[start_y:start_y + min_dim, start_x:start_x + min_dim]

def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE)):
    cap = cv2.VideoCapture(path)
    frames = []

    transform = transforms.Compose([
        transforms.ToTensor(),  # Converts to [0, 1] and moves channel to first dim (C, H, W)
        transforms.Resize(resize),  # Resizes image 
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB

            # Apply transformation and add to list
            frame_tensor = transform(frame)
            frames.append(frame_tensor)

            if max_frames and len(frames) >= max_frames:
                break
    finally:
        cap.release()

    # Stack frames into a single tensor: shape (T, C, H, W)
    return torch.stack(frames) if frames else torch.empty(0)


In [18]:
import torch
import torch.nn as nn
from torchvision.models import inception_v3, Inception_V3_Weights
import torchvision.transforms as transforms

class FeatureExtractor(nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
        weights = Inception_V3_Weights.DEFAULT
        self.model = inception_v3(weights=weights)
        self.model.fc = nn.Identity()  # Output will be (B, 2048)

        for param in self.model.parameters():
            param.requires_grad = False


        self.preprocess = transforms.Compose([
            transforms.Resize((299, 299)),  # Inception V3 expects this size
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])



    def forward(self, x):
        return self.model(x)

In [21]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Assuming train_df is a pandas DataFrame with a "tag" column
label_encoder = LabelEncoder()

# Fit the label encoder on unique tags
label_encoder.fit(np.unique(train_df["tag"]))

# View the vocabulary (same as StringLookup.get_vocabulary())
print(list(label_encoder.classes_))

# Encode the labels
labels = train_df["tag"].values
encoded_labels = label_encoder.transform(labels)

# Optionally convert to torch tensor
import torch
encoded_labels = torch.tensor(encoded_labels, dtype=torch.long)

['ajudar', 'animal', 'aniversário', 'ano', 'banana', 'banheiro', 'bebê', 'cabeça', 'café', 'carne', 'casa', 'cebola', 'comer', 'cortar', 'crescer', 'família', 'filho', 'garganta', 'homem', 'jovem', 'ouvir', 'pai', 'sopa', 'sorvete', 'vagina']


In [23]:
#Define hyperparameters

IMG_SIZE = 299
BATCH_SIZE = 64
EPOCHS = 100

MAX_SEQ_LENGTH = 20
NUM_FEATURES = 2048

In [25]:
feature_extractor = FeatureExtractor().to(device)

In [27]:
import torch
import numpy as np
import os  

from PIL import Image  # Required if frames are PIL images

def prepare_all_videos(df, root_dir):
    num_samples = len(df)
    video_paths = df["video_name"].values.tolist()
    labels = df["tag"].values

    encoded_labels = label_encoder.transform(labels)
    encoded_labels = torch.tensor(encoded_labels, dtype=torch.long)

    frame_features = torch.zeros((num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype=torch.float32)
    frame_masks = torch.zeros((num_samples, MAX_SEQ_LENGTH), dtype=torch.bool)

    feature_extractor.eval()
    with torch.no_grad():
        for idx, path in enumerate(video_paths):
            full_path = os.path.join(root_dir, path)
            frames = load_video(full_path, max_frames=MAX_SEQ_LENGTH)  # List of PIL images or tensors

            video_length = len(frames)
            length = min(video_length, MAX_SEQ_LENGTH)
            frame_masks[idx, :length] = 1

            for j in range(length):
                frame = frames[j]
                
                # Convert to PIL if it's a tensor (in case load_video returns torch tensor)
                if isinstance(frame, torch.Tensor):
                    frame = transforms.ToPILImage()(frame)

                frame = feature_extractor.preprocess(frame).unsqueeze(0).to(device)
                features = feature_extractor(frame)
                frame_features[idx, j, :] = features.squeeze(0).cpu()

    return (frame_features, frame_masks), encoded_labels

In [29]:
train_data, train_labels = prepare_all_videos(train_df, "train")
test_data, test_labels = prepare_all_videos(test_df, "test")

print(f"Frame features in train set: {train_data[0].shape}")  # (num_train, 20, 2048)
print(f"Frame masks in train set: {train_data[1].shape}")     # (num_train, 20)

print(f"train_labels shape: {train_labels.shape}")            # (num_train,)
print(f"test_labels shape: {test_labels.shape}")              # (num_test,)


Frame features in train set: torch.Size([100, 20, 2048])
Frame masks in train set: torch.Size([100, 20])
train_labels shape: torch.Size([100])
test_labels shape: torch.Size([50])


In [30]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score

# Define the sequence model class
class VideoClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(VideoClassifier, self).__init__()
        self.gru1 = nn.GRU(input_dim, 16, batch_first=True)
        self.gru2 = nn.GRU(16, 8, batch_first=True)
        self.dropout = nn.Dropout(0.4)
        self.fc1 = nn.Linear(8, 8)
        self.fc2 = nn.Linear(8, output_dim)
    
    def forward(self, x, mask):
        # Apply GRU layers
        x, _ = self.gru1(x)
        x, _ = self.gru2(x)

        # Apply dropout
        x = self.dropout(x)

        # Apply a fully connected layer
        x = self.fc1(x[:, -1, :])  # Use only the output of the last timestep
        x = self.fc2(x)

        return x

# Define the training and evaluation functions
def run_experiment(train_data, train_labels, test_data, test_labels, model, epochs=30, batch_size=32, lr=0.001):
    # Create DataLoader for train and test
    train_dataset = TensorDataset(train_data, train_labels, train_data)  # Adding mask as dummy for DataLoader
    test_dataset = TensorDataset(test_data, test_labels, test_data)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Model, Loss, Optimizer
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    best_acc = 0
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for batch_idx, (data, labels, mask) in enumerate(train_loader):
            data, labels, mask = data.to(device), labels.to(device), mask.to(device)

            optimizer.zero_grad()
            outputs = model(data, mask)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Evaluate the model after every epoch
        model.eval()
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for data, labels, mask in test_loader:
                data, labels, mask = data.to(device), labels.to(device), mask.to(device)
                outputs = model(data, mask)
                _, preds = torch.max(outputs, 1)
                all_preds.append(preds.cpu().numpy())
                all_labels.append(labels.cpu().numpy())

        # Flatten the predictions and labels
        all_preds = np.concatenate(all_preds)
        all_labels = np.concatenate(all_labels)
        accuracy = accuracy_score(all_labels, all_preds)
        
        # Save the best model
        if accuracy > best_acc:
            best_acc = accuracy
            torch.save(model.state_dict(), './video_classifier_best.pth')

        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Test Accuracy: {accuracy * 100:.2f}%")

    # Load the best model for final evaluation
    model.load_state_dict(torch.load('./video_classifier_best.pth'))
    model.eval()
    return model

# Example usage (assuming train_data, train_labels, test_data, test_labels are PyTorch tensors)
input_dim = NUM_FEATURES  # e.g., 2048 for InceptionV3
hidden_dim = 16  # This is the size of the hidden layer in GRU
output_dim = len(label_encoder.classes_)  # Number of class labels

model = VideoClassifier(input_dim, hidden_dim, output_dim)
trained_model = run_experiment(train_data[0], train_labels, test_data[0], test_labels, model, epochs=30)

Epoch 1/30, Loss: 3.3302, Test Accuracy: 2.00%
Epoch 2/30, Loss: 3.2461, Test Accuracy: 6.00%
Epoch 3/30, Loss: 3.2298, Test Accuracy: 2.00%
Epoch 4/30, Loss: 3.2001, Test Accuracy: 2.00%
Epoch 5/30, Loss: 3.1856, Test Accuracy: 4.00%
Epoch 6/30, Loss: 3.2281, Test Accuracy: 8.00%
Epoch 7/30, Loss: 3.1892, Test Accuracy: 4.00%
Epoch 8/30, Loss: 3.1758, Test Accuracy: 6.00%
Epoch 9/30, Loss: 3.2089, Test Accuracy: 4.00%
Epoch 10/30, Loss: 3.1802, Test Accuracy: 4.00%
Epoch 11/30, Loss: 3.1707, Test Accuracy: 2.00%
Epoch 12/30, Loss: 3.1670, Test Accuracy: 6.00%
Epoch 13/30, Loss: 3.1351, Test Accuracy: 2.00%
Epoch 14/30, Loss: 3.1267, Test Accuracy: 4.00%
Epoch 15/30, Loss: 3.1556, Test Accuracy: 4.00%
Epoch 16/30, Loss: 3.1777, Test Accuracy: 4.00%
Epoch 17/30, Loss: 3.0852, Test Accuracy: 4.00%
Epoch 18/30, Loss: 3.1349, Test Accuracy: 6.00%
Epoch 19/30, Loss: 3.1264, Test Accuracy: 8.00%
Epoch 20/30, Loss: 3.0955, Test Accuracy: 6.00%
Epoch 21/30, Loss: 3.0698, Test Accuracy: 6.00%
E

In [31]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

def evaluate_model(model, test_data, test_labels):
    model.eval()
    all_preds = []
    all_labels = []

    test_dataset = TensorDataset(test_data[0], test_labels, test_data[1])  # (features, labels, masks)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    with torch.no_grad():
        for data, labels, mask in test_loader:
            data, labels, mask = data.to(device), labels.to(device), mask.to(device)
            outputs = model(data, mask)
            _, preds = torch.max(outputs, 1)
            all_preds.append(preds.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

    # Combine predictions and ground truths
    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    # Overall accuracy
    accuracy = accuracy_score(all_labels, all_preds)
    print(f"\n✅ Final Test Accuracy: {accuracy * 100:.2f}%")

    # Classification report (includes precision, recall, f1, support)
    report = classification_report(all_labels, all_preds, target_names=label_encoder.classes_, output_dict=True)
    
    print("\n📊 Per-Class Test Accuracy:")
    for class_name in label_encoder.classes_:
        correct = report[class_name]["recall"]  # Recall == accuracy for single-label classification
        print(f"  {class_name:<20}: {correct * 100:.2f}%")

    return accuracy

In [32]:
evaluate_model(model, test_data, test_labels)


✅ Final Test Accuracy: 8.00%

📊 Per-Class Test Accuracy:
  ajudar              : 0.00%
  animal              : 0.00%
  aniversário         : 100.00%
  ano                 : 0.00%
  banana              : 0.00%
  banheiro            : 0.00%
  bebê                : 0.00%
  cabeça              : 0.00%
  café                : 0.00%
  carne               : 0.00%
  casa                : 0.00%
  cebola              : 0.00%
  comer               : 0.00%
  cortar              : 0.00%
  crescer             : 0.00%
  família             : 0.00%
  filho               : 0.00%
  garganta            : 0.00%
  homem               : 0.00%
  jovem               : 100.00%
  ouvir               : 0.00%
  pai                 : 0.00%
  sopa                : 0.00%
  sorvete             : 0.00%
  vagina              : 0.00%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


0.08

In [33]:
import torch
import torch.nn as nn
import numpy as np
import os

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the GRU-based sequence model
class VideoClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(VideoClassifier, self).__init__()
        self.gru1 = nn.GRU(input_dim, 16, batch_first=True)
        self.gru2 = nn.GRU(16, 8, batch_first=True)
        self.dropout = nn.Dropout(0.4)
        self.fc1 = nn.Linear(8, 8)
        self.fc2 = nn.Linear(8, output_dim)
    
    def forward(self, x, mask):
        x, _ = self.gru1(x)
        x, _ = self.gru2(x)
        x = self.dropout(x)
        x = self.fc1(x[:, -1, :])
        x = self.fc2(x)
        return x

# Function to prepare a single video for prediction
def prepare_single_video(frames, feature_extractor, max_seq_length=20):
    if not isinstance(frames, torch.Tensor):
        frames = torch.tensor(frames, dtype=torch.float32)

    frame_mask = torch.zeros((1, max_seq_length), dtype=torch.bool)
    frame_features = torch.zeros((1, max_seq_length, NUM_FEATURES), dtype=torch.float32)

    video_length = frames.shape[0]
    length = min(max_seq_length, video_length)

    with torch.no_grad():
        for j in range(length):
            frame = frames[j].unsqueeze(0).to(device)  # (1, 3, H, W)
            features = feature_extractor(frame)  # (1, 2048)
            frame_features[0, j, :] = features.squeeze(0).cpu()
        frame_mask[0, :length] = 1

    return frame_features, frame_mask

#Function to perform prediction on one video
def sequence_prediction(path, feature_extractor, sequence_model, label_encoder, max_seq_length=20):
    frames = load_video(os.path.join("test", path), max_frames=max_seq_length)  # (T, C, H, W)

    frame_features, frame_mask = prepare_single_video(frames, feature_extractor, max_seq_length)
    frame_features, frame_mask = frame_features.to(device), frame_mask.to(device)

    sequence_model.eval()
    with torch.no_grad():
        logits = sequence_model(frame_features, frame_mask)
        probabilities = torch.softmax(logits, dim=-1).cpu().numpy().flatten()

    class_vocab = label_encoder.classes_
    sorted_indices = np.argsort(probabilities)[::-1]

    print("\nPrediction Probabilities:")
    for idx in sorted_indices:
        print(f"  {class_vocab[idx]}: {probabilities[idx] * 100:5.2f}%")

    return frames  # Optional: use for visualization

# --- Model Setup and Prediction Example ---

# Define model parameters
input_dim = NUM_FEATURES
hidden_dim = 16
output_dim = len(label_encoder.classes_)

# Create and load the trained sequence model
sequence_model = VideoClassifier(input_dim, hidden_dim, output_dim)
sequence_model.load_state_dict(torch.load('./video_classifier_best.pth'))
sequence_model = sequence_model.to(device)

# Run prediction on a random test video
test_video = np.random.choice(test_df["video_name"].values.tolist())
print(f"\nTest video path: {test_video}")

test_frames = sequence_prediction(test_video, feature_extractor, sequence_model, label_encoder)


Test video path: C:/Users/wafab/OneDrive/Desktop/OSL_Wafa/test/ouvir/ouvir_sb_2.mp4

Prediction Probabilities:
  jovem:  6.39%
  cabeça:  6.10%
  aniversário:  5.16%
  banheiro:  4.95%
  família:  4.90%
  café:  4.78%
  casa:  4.67%
  pai:  4.50%
  animal:  4.29%
  cortar:  4.17%
  sopa:  4.07%
  ano:  4.07%
  filho:  3.92%
  ajudar:  3.70%
  crescer:  3.68%
  comer:  3.57%
  sorvete:  3.49%
  banana:  3.43%
  vagina:  3.18%
  carne:  3.12%
  cebola:  3.11%
  ouvir:  2.78%
  homem:  2.69%
  garganta:  2.68%
  bebê:  2.61%


In [59]:
from IPython.display import HTML

HTML("""
    <video width="520" height="440" controls>
        <source src="videos/ouvir_sb_2.mp4" type="video/mp4">
        Your browser does not support the video tag.
    </video>
""")