# Testing on the test set from Auslan Dataset
# Importing Required Libraries

In this section, we import the necessary libraries for:
- **Model building**: `torch`, `torch.nn`, `torchvision.models`
- **Data transformations**: `torchvision.transforms`
- **Image loading and preprocessing**: `PIL.Image`
- **Argument parsing** (though in Jupyter, we won't use command-line arguments directly)


In [8]:
import os
import torch
import torch.nn as nn
from torchvision import transforms, models
from PIL import Image
import wandb
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm

In [9]:
# Initialize W&B for the testing phase
wandb.init(project="auslan-handsign-classification", entity="srk_2024-the-australian-national-university", job_type="test")


# Image Transformations

We define the same transformations used during validation/testing:
- **Grayscale conversion**: Convert the images to 1-channel grayscale format.
- **Resize**: Resize the images to 224x224 pixels to match the input size required by ResNet.
- **ToTensor**: Convert the image to a PyTorch tensor.
- **Normalization**: Normalize the pixel values to [0, 1] range using mean and standard deviation specific for grayscale images.


In [10]:
# Define the same transformations used for validation/testing
test_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert images to grayscale
    transforms.Resize((224, 224)),  # Resize to match the input size of ResNet
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize([0.5], [0.5])  # Normalize for grayscale (1 channel)
])

# Helper Functions to Load and Preprocess Images

1. **load_image**: This function loads a single image from the file path, applies the necessary transformations, and prepares it for model inference.
2. **get_label_from_folder**: Maps folder names to the corresponding class labels. For example, digits (0-9) and letters (A-Z) are mapped to appropriate class indices (0-35).


In [11]:
# Function to load a single image and apply transformations
def load_image(image_path, transform):
    image = Image.open(image_path).convert('RGB')  # Convert to RGB first
    return transform(image).unsqueeze(0)  # Apply transformations and add batch dimension

# Function to map folder names to class labels
def get_label_from_folder(folder_name):
    if folder_name.isdigit():  # For digits (0-9)
        return int(folder_name)
    elif len(folder_name) == 1 and folder_name.isalpha():  # For letters (A-Z)
        return ord(folder_name.upper()) - ord('A') + 10  # Map A-Z to 10-35
    else:
        raise ValueError(f"Unexpected folder name format: {folder_name}")


# Testing the Model on the Test Dataset

This function tests the model on a directory of test images:
- **test_model_on_directory**: This function iterates over the test dataset, loading images, applying transformations, and predicting the class label for each image.
- The accuracy is calculated by comparing predicted labels with true labels from the folder names.


In [12]:
def test_model_on_directory(test_dir, model, device, transform):
    model.eval()  # Set the model to evaluation mode
    correct_predictions = 0
    total_images = 0
    y_true = []
    y_pred = []

    # Get the total number of images for the progress bar
    total_images_to_process = sum(len(files) for _, _, files in os.walk(test_dir) if files)
    
    # Initialize progress bar
    with tqdm(total=total_images_to_process, desc="Testing Progress") as pbar:
        # Iterate through each class folder in the test directory
        for class_folder in os.listdir(test_dir):
            class_folder_path = os.path.join(test_dir, class_folder)
            if not os.path.isdir(class_folder_path):
                continue  # Skip non-directory files

            true_label = get_label_from_folder(class_folder)  # Get the label from folder name

            # Iterate through images in the class folder
            for image_file in os.listdir(class_folder_path):
                image_path = os.path.join(class_folder_path, image_file)
                if image_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    # Load and preprocess the image
                    image_tensor = load_image(image_path, transform).to(device)

                    # Perform inference
                    with torch.no_grad():
                        outputs = model(image_tensor)
                        _, predicted_label = torch.max(outputs, 1)

                    # Collect predictions and true labels
                    y_pred.append(predicted_label.item())
                    y_true.append(true_label)

                    # Update the correct prediction count
                    if predicted_label.item() == true_label:
                        correct_predictions += 1
                    total_images += 1

                    # Update progress bar
                    pbar.update(1)

            # Log progress after each class folder is processed
            if total_images > 0:
                accuracy = (correct_predictions / total_images) * 100
                classification_report_dict = classification_report(y_true, y_pred, output_dict=True)
                
                # Log progress to W&B after each class folder
                wandb.log({
                    "test_accuracy_progress": accuracy,
                    "precision_progress": classification_report_dict["macro avg"]["precision"],
                    "recall_progress": classification_report_dict["macro avg"]["recall"],
                    "f1-score_progress": classification_report_dict["macro avg"]["f1-score"],
                })

    # Final overall accuracy calculation
    accuracy = (correct_predictions / total_images) * 100 if total_images > 0 else 0

    # Log final classification metrics
    classification_report_dict = classification_report(y_true, y_pred, output_dict=True)
    wandb.log({
        "final_test_accuracy": accuracy,
        "precision": classification_report_dict["macro avg"]["precision"],
        "recall": classification_report_dict["macro avg"]["recall"],
        "f1-score": classification_report_dict["macro avg"]["f1-score"],
        "test_accuracy_wandb": classification_report_dict["accuracy"]
    })

    # Log confusion matrix after all predictions
    wandb.log({"confusion_matrix": wandb.plot.confusion_matrix(probs=None,
                                                               y_true=y_true,
                                                               preds=y_pred,
                                                               class_names=[str(i) for i in range(36)])})

    # Print the final test accuracy
    print(f"Test Accuracy: {accuracy:.2f}% ({correct_predictions}/{total_images} correct predictions)")

    return accuracy


# Model Setup and Loading Weights

1. **Model Architecture**: We load a ResNet-18 model and modify it to accept 1-channel grayscale images by adjusting the `conv1` layer.
2. **Load Model Weights**: The pre-trained model weights are loaded from the specified path (`resnet18_handsign_final.pth`).


In [13]:
# Set the device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the ResNet-18 model without pretrained weights
model = models.resnet18(weights=None)  # Set pretrained=False to avoid loading any pre-trained weights
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)  # Adjust input layer for grayscale
num_classes = 36
model.fc = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(model.fc.in_features, 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(256, num_classes)
)

# Load the trained model weights
model.load_state_dict(torch.load('resnet18_handsign_final.pth', map_location=device))  # Load weights
model = model.to(device)


  model.load_state_dict(torch.load('resnet18_handsign_final.pth', map_location=device))  # Load weights


# Testing the Model

Now that the model is loaded and the test dataset is ready, we can test the model's performance by running `test_model_on_directory`.
The function will print the test accuracy and the number of correct predictions.


In [14]:
# Test the model on the test dataset
test_dir = r"C:\Users\zed20\Documents\Auslan_dataset\dataset_split\test"
test_model_on_directory(test_dir, model, device, test_transforms)

wandb.finish()

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Test Accuracy: 96.15% (10282/10694 correct predictions)


0,1
f1-score,▁
f1-score_progress,█▁▁▃▄▄▁▁▂▃▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▆▆▇▆▆▇▇▇▇▇
final_test_accuracy,▁
precision,▁
precision_progress,█▁▁▃▄▄▁▁▂▃▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▆▆▇▆▆▇▇▇▇▇
recall,▁
recall_progress,█▁▁▃▄▄▁▁▂▃▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▆▆▇▆▆▇▇▇▇▇
test_accuracy_progress,█▇▅▄▂▃▃▁▁▂▂▂▂▃▃▃▄▄▄▄▄▄▃▃▃▃▄▄▄▄▃▃▃▃▃▃
test_accuracy_wandb,▁

0,1
f1-score,0.96157
f1-score_progress,0.96157
final_test_accuracy,96.14737
precision,0.96299
precision_progress,0.96299
recall,0.96175
recall_progress,0.96175
test_accuracy_progress,96.14737
test_accuracy_wandb,0.96147
