In [13]:
# Import necessary libraries for image and video processing
import torch  # PyTorch for deep learning
from torch.autograd import Variable  # For creating variable tensors
from PIL import Image  # Python Imaging Library for image processing
import glob  # For file path manipulation and searching
import cv2  # OpenCV for video processing

# Import user-defined modules for model utilities and components
import import_ipynb  # Import .ipynb notebooks as modules (if necessary)
from Model_Utils import classes, transform, transform_PIL, model  # Import model-related components

In [14]:
# Load the pre-trained model checkpoint from 'Best_Checkpoint_Cross.model'
checkpoint = torch.load('Best_Checkpoint_Cross.model')

# Load the model's state_dict from the checkpoint to initialize the model's weights
model.load_state_dict(checkpoint)

# Set the model to evaluation mode (disables dropout and batch normalization)
model.eval()

ConvNet(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(20, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (fc): Linear(in_features=819200, out_features=4, bias=True)
)

In [15]:
# Define a prediction function for single images
def Image_Prediction(image, transform):
    # Apply the specified transformation to the input image and convert it to a tensor
    image_tensor = transform(image).float()
    image_tensor = image_tensor.unsqueeze(0)  # Add a batch dimension
    
    # Check if a GPU is available and move the tensor to the GPU if possible
    if torch.cuda.is_available():
        image_tensor = image_tensor.cuda()
        
    # Wrap the tensor with Variable (used for older PyTorch versions)
    input = Variable(image_tensor)
    
    # Pass the input through the pre-trained model
    output = model(input)
    
    # Get the index of the class with the highest probability
    index = output.data.numpy().argmax()
    
    # Get the predicted class label from the 'classes' list
    pred = classes[index]
    
    return pred


In [16]:
# Define a function for making predictions on video frames using a pre-trained model
def Video_Prediction(frame, transform, model):
    # Preprocess the video frame using the specified transformation and convert to a tensor
    image_tensor = transform(frame).float()
    image_tensor = image_tensor.unsqueeze(0)  # Add a batch dimension

    # Check if a GPU is available and move the tensor and model to the GPU if possible
    if torch.cuda.is_available():
        image_tensor = image_tensor.cuda()
        model = model.cuda()  # Move the model to CUDA

    # Wrap the tensor with Variable (used for older PyTorch versions)
    input_var = Variable(image_tensor)

    # Forward pass through the pre-trained model to obtain predictions
    output = model(input_var)

    # Get the index of the class with the highest probability
    index = output.data.cpu().numpy().argmax()  # Move to CPU for numpy conversion

    # Get the predicted class label from the 'classes' list (replace with your own list)
    prediction = classes[index]

    return prediction