In [1]:
import cv2
import torch
import numpy as np
from collections import deque
import torchvision.transforms as transforms
import torch.nn as nn

In [None]:
# Check for GPU availability
print(f'PyTorch version: {torch.__version__}')
print(f'CUDA available: {torch.cuda.is_available()}')
print(f'GPU devices: {torch.cuda.device_count()}')
if torch.cuda.is_available():
    print(f'Current GPU: {torch.cuda.get_device_name(0)}')

PyTorch version: 2.7.1+cpu
CUDA available: False
GPU devices: 0


: 

In [1]:
import cv2
import numpy as np
import mediapipe as mp
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from collections import deque

In [2]:

CENTER_IDX = 0  # Bhigey Hooth tere
CROP_SIZE = 120
SEQUENCE_LENGTH = 80
NUM_CLASSES = 2
IMG_SIZE = 120
actions = ['Anuman', 'Badhi']

#torch MUDAMUDA
class CNNLSTM(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES):
        super(CNNLSTM, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.lstm = nn.LSTM(input_size=64 * 30 * 30, hidden_size=128, batch_first=True)
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        B, T, C, H, W = x.shape
        x = x.view(B * T, C, H, W)
        x = self.cnn(x)
        x = x.view(B, T, -1)
        x, _ = self.lstm(x)
        x = self.fc(x[:, -1, :])
        return x

# LoadModel

device = torch.device("cuda")
model = CNNLSTM().to(device)
model.load_state_dict(torch.load("model_weights.pth", map_location=device))
model.eval()

# Preprocess
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.Grayscale(),
    transforms.ToTensor()
])

# Live
cap = cv2.VideoCapture(0)
mp_face_mesh = mp.solutions.face_mesh
sequence = deque(maxlen=SEQUENCE_LENGTH)
pred = None  

with mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1,
                           min_detection_confidence=0.5, min_tracking_confidence=0.5) as face_mesh:

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_mesh.process(image_rgb)
        display_frame = frame.copy()

        if results.multi_face_landmarks:
            landmarks = results.multi_face_landmarks[0]
            h, w, _ = frame.shape
            center_lm = landmarks.landmark[CENTER_IDX]
            cx, cy = int(center_lm.x * w), int(center_lm.y * h)

        
            x1 = max(cx - CROP_SIZE // 2, 0)
            y1 = max(cy - CROP_SIZE // 2, 0)
            x2 = min(cx + CROP_SIZE // 2, w)
            y2 = min(cy + CROP_SIZE // 2, h)
            crop = frame[y1:y2, x1:x2]

            try:
                preprocessed = transform(crop).unsqueeze(0).to(device)
                sequence.append(preprocessed)
            except:
                continue

            cv2.circle(display_frame, (cx, cy), 3, (0, 255, 0), -1)

            # Make prediction if sequence is full
            if len(sequence) == SEQUENCE_LENGTH:
                seq_tensor = torch.cat(list(sequence), dim=0).unsqueeze(0).to(device)
                with torch.no_grad():
                    output = model(seq_tensor)
                    pred = torch.argmax(output, dim=1).item()

        if pred is not None:
            label = f"Prediction: {actions[1-pred]}"
            cv2.putText(display_frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                        1, (0, 0, 255), 2)

        cv2.imshow("Live Prediction", display_frame)
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()



In [12]:
cap.release()
cv2.destroyAllWindows()