In [1]:
import numpy as np
import os

# Define the relative path from the notebook to the data
data_dir = os.path.join("..", "data", "preprocessed", "equal_distribution")

# Load the NumPy arrays
X_train = np.load(os.path.join(data_dir, "X_train.npy"))
y_train = np.load(os.path.join(data_dir, "y_train.npy"))
X_val = np.load(os.path.join(data_dir, "X_val.npy"))
y_val = np.load(os.path.join(data_dir, "y_val.npy"))

print("Loaded preprocessed data:")
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_val shape:   {X_val.shape}")
print(f"y_val shape:   {y_val.shape}")

Loaded preprocessed data:
X_train shape: (7744, 524288)
y_train shape: (7744,)
X_val shape:   (1936, 524288)
y_val shape:   (1936,)


In [2]:
from sklearn.ensemble import RandomForestClassifier
# Train the Random Forest Classifier
print("\nTraining Random Forest Classifier...")
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1) # n_jobs=-1 uses all available cores
rf_classifier.fit(X_train, y_train)
print("Training complete.")



Training Random Forest Classifier...
Training complete.


In [3]:
from sklearn.metrics import accuracy_score, classification_report
from datasets import SignLangDataset

running_local = True if os.getenv('JUPYTERHUB_USER') is None else False
DATASET_PATH = "."

# Set the location of the dataset
if running_local:
    # If running on your local machine, the sign_lang_train folder's path should be specified here
    local_path = os.path.join('..', '..', 'sign_lang_train')
    if os.path.exists(local_path):
        DATASET_PATH = local_path
else:
    # If running on the Jupyter hub, this data folder is already available
    # You DO NOT need to upload the data!
    DATASET_PATH = "/data/mlproject22/sign_lang_train"

csv_filename = "labels.csv"  # This is your file inside sign_lang_train

# Create dataset
dataset = SignLangDataset(csv_file=csv_filename, root_dir=DATASET_PATH)


In [4]:
# Evaluate the model
print("\nEvaluating the model on the validation set...")
y_pred = rf_classifier.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {accuracy:.4f}")

# Optional: Print a more detailed classification report
print("\nClassification Report:")
# You'll need the original class names for a clearer report
# Assuming your dataset.class_names is available
# For example: class_names = list(map(str, list(range(10)))) + list(ascii_lowercase)
# from string import ascii_lowercase
# full_class_names = list(map(str, list(range(10)))) + list(ascii_lowercase)
print(classification_report(y_val, y_pred, target_names=dataset.class_names))

# You can also save your model if you wish
# import joblib
# joblib.dump(rf_classifier, 'random_forest_model.joblib')
# print("Random Forest model saved.")


Evaluating the model on the validation set...
Validation Accuracy: 0.7273

Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.93      0.82       112
           1       0.88      0.32      0.47        22
           2       0.80      0.18      0.30        22
           3       1.00      0.23      0.37        22
           4       0.68      0.84      0.75       112
           5       0.62      0.22      0.32        23
           6       0.58      0.84      0.68       112
           7       1.00      0.18      0.31        22
           8       1.00      0.18      0.30        34
           9       0.70      0.92      0.79       112
           a       0.62      0.23      0.33        22
           b       0.89      0.88      0.88        56
           c       0.86      0.96      0.91       112
           d       1.00      0.32      0.49        34
           e       0.91      0.43      0.59        23
           f       1.00      0.35   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [5]:
import joblib

# --- Saving the model ---
model_filename = 'random_forest_64x64_distributed_data.joblib'
joblib.dump(rf_classifier, model_filename) # Replace rf_model with rf_model_hog or rf_model_deep if you used those
print(f"Model saved to {model_filename}")

Model saved to random_forest_64x64_distributed_data.joblib


### different method, cnn pretrained that will extract the feautures from the images ###

In [6]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms

# Load VGG16 pretrained on ImageNet
vgg = models.vgg16(pretrained=True)

# Remove the classifier head, keep only convolutional features
feature_extractor = torch.nn.Sequential(*list(vgg.features.children()))
feature_extractor.eval()  # Set to eval mode

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\frala/.cache\torch\hub\checkpoints\vgg16-397923af.pth
100%|██████████| 528M/528M [00:45<00:00, 12.3MB/s] 


Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [None]:
### Create a transformer with the vgg cnn model to create a new dataset instance
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

transform_vgg = transforms.Compose([
    transforms.ToPILImage(),      # Convert numpy array to PIL
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize
])

In [12]:
from torch.utils.data import DataLoader
# Create dataset
vgg_dataset  = SignLangDataset(csv_file=csv_filename, root_dir=DATASET_PATH, transform=transform_vgg)

# And a dataloader for batching
vgg_loader = DataLoader(vgg_dataset, batch_size=32, shuffle=False)

In [13]:
from tqdm import tqdm
# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
feature_extractor.to(device)

X_features = []
y_labels = []

print("Extracting features using VGG...")
with torch.no_grad():
    for batch in tqdm(vgg_loader):
        images = batch['image'].to(device)  # [B, 3, 224, 224]
        labels = batch['label']

        # Forward pass through the CNN
        features = feature_extractor(images)  # [B, C, H, W]
        features = torch.flatten(features, start_dim=1)  # [B, C*H*W]

        X_features.append(features.cpu().numpy())
        y_labels.extend(labels.numpy())

# Combine all batches into full arrays
X_vgg = np.concatenate(X_features, axis=0)
y_vgg = np.array(y_labels)

Extracting features using VGG...


  0%|          | 0/303 [00:00<?, ?it/s]


RuntimeError: Input type (unsigned char) and bias type (float) should be the same