# `Setup`

In [20]:
import numpy as np
import os
import torch
import time
import torch
from torch import nn
import torchvision
torchvision.disable_beta_transforms_warning()
import torchvision.transforms as T
from torchvision.transforms import v2
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import kornia
from IPython import display
import cv2 as cv

os.environ["COLAB"] = "False"
# Changing directory into aml_itu
if os.getcwd().split('/')[-1] != 'aml_itu': os.chdir(os.path.abspath('.').split('aml_itu/')[0]+'aml_itu')

from utils.helpers import *
from utils.StatefarmPytorchDataset import StateFarmDataset

# Setting up device
if torch.cuda.is_available():
    device = torch.device("cuda")
    print (f"GPU is available")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print('MPS device found.')
else:
    print ("No GPU available, using CPU instead")
    
    
from utils.models.EfficientNet import EfficientNet
from utils.models.TinyVGG import TinyVGG
from utils.pipelines.image_transformation import image_transformer

MPS device found.


##### Helpers, utils, variables, etc.

In [21]:
# Load config
config = load_config(eval(os.environ["COLAB"]))

# IMG Transformations
augmentations =  v2.Compose([
    T.Resize((168, 224), antialias=True),
    v2.ToDtype(torch.float32, scale=True)])

# Initialize variables
frame_rate = 10  # frames per second
duration = 20  # duration of video in seconds
image_folder = f"{config['outputs']}/tmp_video_generator"  # Folder to save images
video_name = 'predictions.mp4'  # Output video name

# Ensure the folder for images exists
os.makedirs(image_folder, exist_ok=True)

# IMG Transformations
augmentations =  v2.Compose([
    T.Resize((168, 224), antialias=True),
    v2.ToDtype(torch.float32, scale=True)])

# Generate cam
cam = cv.VideoCapture(0)

# Extract frame from camera
def get_frame(cam):
    """Capture frame from webcam"""
    _, frame = cam.read() 
    frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
    
    # Transform to tensor 
    frame = kornia.image_to_tensor(frame)
    return frame

def batch_image(frame):
    """Transform image to batch"""
    frame = frame.unsqueeze(0)
    return frame

In [4]:
class weighted_prediction:
    def __init__(self, config, n=10):
        """Class takes pytorch predictions as input and outputs a weighted prediction over the last n frames"""
        self.n = n
        self.config = config
        self.predictions = None
        self.weighted_predictions = []

    # Append prediction to list
    def __call__(self, prediction):
        """Performs the weighted average"""
        # Append prediction to list
        if self.predictions is None:
            self.predictions = prediction.detach().cpu().numpy()
        else:
            self.predictions = np.vstack((self.predictions, prediction.detach().cpu().numpy()))

        # If their arent enought predictions, return nothing
        if self.predictions.shape[0] < self.n:
            self.weighted_predictions.append(None)
            return None
        
        # Else return the weighted prediction over the last n frames
        else:
            self.weighted_predictions.append(np.argmax(self.predictions[-self.n:, :].mean(axis=0)))
            return self.weighted_predictions[-1]
            
    def map_labels(self, prediction):
        """Maps the prediction to the correct class"""
        if prediction is None:
            return 'Out of scope'
        return config['dataset']['class_mapping'][f'c{prediction}']

##### Load Model

EfficientNet

In [22]:
last_efficientnet_model

'EfficientNet_b0_AdamW_20231204_103512_epoch_10.pt'

: 

In [5]:
# Path and directory files
efficinetnet_path = os.path.join(config['outputs']['path'], 'EfficientNet_b0_AdamW')
efficientnet_models = os.listdir(os.path.join(config['outputs']['path'], 'EfficientNet_b0_AdamW'))
last_efficientnet_model = sorted(efficientnet_models)[-1]

efficient = EfficientNet()

# Load parameters from last model
efficient.load_state_dict(torch.load(os.path.join(efficinetnet_path, last_efficientnet_model)))

# Set model to eval
efficient = efficient.eval()

TinyVGG

In [7]:
# Path and directory files
TinyVGG_path = os.path.join(config['outputs']['path'], 'TinyVGG_500k')
TinyVGG_file = 'TinyVGG_500k_final.pt'

# initalize model
tinyvgg = TinyVGG()

# Load parameters from last model
tinyvgg.load_state_dict(torch.load(os.path.join(TinyVGG_path, TinyVGG_file)))

# Set model to eval
tinyvgg = tinyvgg.eval()

#### Mean Looper
This class will take in predictions, and return the mean predictions over the last x amount

##### Inference Loop

In [18]:
def inference_loop(model, 
                   image_size,
                   weighted_frames = 10,
                   total_seconds=10,
                   device=device):
    """Inference loop for a given model"""
    
    # Inference loop helpers
    model = model.to(device)
    predictions = weighted_prediction(config, n=weighted_frames)
    start_time = time.time()
    font = cv.FONT_HERSHEY_SIMPLEX
    font_scale = 1  # Font size
    thickness = 2  # Font thickness
    top_border_size = 20
    border_color = (255, 255, 255)  # White color in BGR format
    
    # Create a named window
    cv.namedWindow('Output', cv.WINDOW_NORMAL)

    # Resize the window
    window_width = 800
    window_height = 600
    cv.resizeWindow('Output', window_width, window_height)
    
    while True:
        # Get frame
        frame = get_frame(cam)
        
        # Transform image
        frame = image_transformer(frame, size=image_size)
       
        # Transform to batch
        frame = batch_image(frame)
        frame = frame.to(device)
       
        # Predict
        prediction = model(frame)

        # Append prediction and print time
        prediction_start_time = time.time()
        out = predictions(prediction)
        prediction_end_time = time.time()
        print(f'Prediction time: {prediction_end_time - prediction_start_time}')
        
        # Image to numpy
        frame = cv.cvtColor(frame.squeeze(0).detach().cpu().permute(1, 2, 0).numpy(), cv.COLOR_RGB2BGR)
        
        # Add the border on top
        frame = cv.copyMakeBorder(frame, top=top_border_size, bottom=0, left=0, right=0, 
                                              borderType=cv.BORDER_CONSTANT, value=border_color)
        # Extract prediction
        if out is not None:
            text = predictions.map_labels(out)
        else: 
            text = 'Out of scope'
        
        # Get text size
        textsize = 10

        # Position the text
        print(textsize)
        textX = (frame.shape[1] - textsize) // 2
        textY = 30  # Position the text 30 pixels from the top edge
        
        # Put the text on the image
        cv.putText(frame, text, (textX, textY), font, font_scale, (0,0,255), thickness)
        
        # Show image
        cv.imshow('Output', frame)
        
        
        # If total seconds have passed, break
        if time.time() - start_time > total_seconds:
            break
        
        # Wait for 25 ms and check if the user wants to exit (press 'q')
        if cv.waitKey(25) & 0xFF == ord('q'):
            break
    
    return predictions

In [19]:
inference_predictions = inference_loop(efficient, image_size='L_SQUARED', weighted_frames=10, total_seconds=10000)

Prediction time: 0.012494087219238281
10
Prediction time: 0.0035130977630615234
10
Prediction time: 0.00045418739318847656
10
Prediction time: 0.0008690357208251953
10
Prediction time: 0.000354766845703125
10
Prediction time: 0.00034308433532714844
10
Prediction time: 0.0003402233123779297
10
Prediction time: 0.00038909912109375
10
Prediction time: 0.00032782554626464844
10
Prediction time: 0.0004470348358154297
10
Prediction time: 0.0004019737243652344
10
Prediction time: 0.00045108795166015625
10
Prediction time: 0.00039315223693847656
10
Prediction time: 0.0004360675811767578
10
Prediction time: 0.0004069805145263672
10
Prediction time: 0.00040912628173828125
10
Prediction time: 0.00040340423583984375
10
Prediction time: 0.00043487548828125
10
Prediction time: 0.0004031658172607422
10
Prediction time: 0.0004780292510986328
10
Prediction time: 0.00044989585876464844
10
Prediction time: 0.00040602684020996094
10
Prediction time: 0.0004019737243652344
10
Prediction time: 0.000431299209

KeyboardInterrupt: 