In [7]:
import os
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms

In [8]:


class ConvNet(nn.Module):
    def __init__(self,input_chanels,image_size,num_classes):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=input_chanels, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.dropout1 = nn.Dropout2d(0.25)
        
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.dropout2 = nn.Dropout2d(0.25)
        
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.dropout3 = nn.Dropout2d(0.25)
        
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.dropout4 = nn.Dropout2d(0.25)
        
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        output_size = image_size // 2**4
        self.fc1 = nn.Linear(256 * output_size * output_size, 512)  # Assuming input image size is 128x128
        self.fc2 = nn.Linear(512, num_classes)
    
    def forward(self, x):
        x = self.pool(F.leaky_relu(self.bn1(self.conv1(x))))
        x = self.dropout1(x)
        
        x = self.pool(F.leaky_relu(self.bn2(self.conv2(x))))
        x = self.dropout2(x)
        
        x = self.pool(F.leaky_relu(self.bn3(self.conv3(x))))
        x = self.dropout3(x)
        
        x = self.pool(F.leaky_relu(self.bn4(self.conv4(x))))
        x = self.dropout4(x)
        
        x = x.view(x.size(0),-1)  # Flatten the tensor
        x = F.leaky_relu(self.fc1(x))
        x = self.fc2(x)
        
        return F.log_softmax(x, dim=1)



In [9]:
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

print("Using {} device".format(device))

In [10]:
model = ConvNet(input_chanels=3,image_size=128,num_classes=6).to(device)
model.to(device)
state_dict = torch.load("hand_torch_modelv3.pth")
model.load_state_dict(state_dict)
# Define the transformation
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),  # Normalize pixel values to range -1 to 1
])


In [11]:
import cv2
import numpy as np
# Open a connection to the webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

print("Press 'q' to quit.")
while True:
    ret, frame = cap.read()

    if not ret:
        print("Failed to grab frame.")
        break

    # Display the resulting frame
    cv2.imshow('Webcam', frame)

    img = cv2.resize(frame, (128, 128))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(img)
    img = transform(img)
    img = img.unsqueeze(0)
    img = img.to(device)
    with torch.no_grad():
        output = model(img)
        _, predicted = torch.max(output.data, 1)
        
    cv2.putText(frame, f'Predicted class: {predicted.item()}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)



    # Mostrar el video
    cv2.imshow('Video', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything is done, release the capture
cap.release()
cv2.destroyAllWindows()

In [12]:
# Open a connection to the webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

print("Press 'q' to quit.")
while True:
    ret, frame = cap.read()

    if not ret:
        print("Failed to grab frame.")
        break

    # Display the resulting frame
    cv2.imshow('Webcam', frame)

    # Press 'q' on the keyboard to exit the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    # Press 'c' to capture the image and make a prediction
    if cv2.waitKey(1) & 0xFF == ord('c'):
        # Convert the captured frame to PIL format
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(image)
        
        # Apply the transformations
        image = transform(image)
        image = image.unsqueeze(0)  # Add batch dimension

        # Move the image to the device
        image = image.to(device)

        # Make the prediction
        with torch.no_grad():
            output = model(image)
            _, predicted = torch.max(output.data, 1)
            print(f'Predicted class: {predicted.item()}')

# When everything is done, release the capture
cap.release()
cv2.destroyAllWindows()