In [46]:
import torch.nn as nn
import torch.optim as optim
import torch
from PIL import Image
import cv2
from torchvision import transforms
import time
import torch.nn.functional as F


In [47]:
import torchvision.models as models

class ResNet50(nn.Module):
    def __init__(self, num_classes=2):
        super(ResNet50, self).__init__()
        resnet50 = models.resnet50(pretrained=True)
        modules = list(resnet50.children())[:-1]
        self.resnet50 = nn.Sequential(*modules)
        self.fc = nn.Sequential(
            nn.Linear(resnet50.fc.in_features, 1024),
            nn.ReLU(),
            nn.BatchNorm1d(1024),
            nn.Linear(1024, num_classes)
        )

    def forward(self, x):
        x = self.resnet50(x)
        x = x.mean([2, 3])
        x = self.fc(x)
        return x


In [48]:
model = ResNet50(num_classes=2)
model.load_state_dict(torch.load('model.pth'))

<All keys matched successfully>

In [49]:
transform = transforms.Compose([
    transforms.RandomResizedCrop(64),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
]) #define transform

In [50]:
def img_process(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    pil_image = Image.fromarray(img)
    img = transform(pil_image).unsqueeze(0)
    with torch.no_grad():
        model.eval()
        outputs = model(img)
        softmax_probs = F.softmax(outputs, dim=1)
        _, predicted_class = torch.max(outputs, 1)
        return softmax_probs.tolist()[0],  predicted_class.tolist()[0]

In [51]:
img = cv2.imread(r"data\human detection dataset\0\144.png")

In [52]:
img_process(img)[1]

0

In [53]:

# Mở camera
cap = cv2.VideoCapture(0)  # Số 0 thường tương ứng với camera mặc định của máy tính

while True:
    # Đọc frame từ camera
    ret, frame = cap.read()
    prob, ishuman = img_process(frame)
    if ishuman == 1 and prob[ishuman] > 0.6:
        text = "HUMAN"
    elif ishuman == 0 and prob[ishuman]> 0.6 : 
        text = "NOHUMAN"
    # Hiển thị frame
    cv2.putText(frame, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow('Camera', frame)
    # Thoát khỏi vòng lặp khi nhấn phím 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
# Giải phóng camera và đóng cửa sổ
cap.release()
cv2.destroyAllWindows()
