In [3]:
import torch
from torchvision import transforms
from torchvision import models
from PIL import Image
import cv2

In [7]:
import torch.nn as nn

# 1. ResNet50 모델 정의
model = models.resnet50(weights=None)

# 2. 맞춤형 FC 레이어 정의
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 1024),  # 더 큰 크기의 레이어
    nn.BatchNorm1d(1024),  # Batch Normalization 추가
    nn.ReLU(),
    nn.Dropout(0.4),  # Dropout 비율 조정
    nn.Linear(1024, 512),
    nn.BatchNorm1d(512),  # Batch Normalization 추가
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(512, 34),
)

# 3. 가중치 로드
state_dict = torch.load('model_food/RESNET50.pth', map_location=torch.device('cpu'))
model.load_state_dict(state_dict)

# 4. 평가 모드로 설정
model.eval()

  state_dict = torch.load('model_food/RESNET50.pth', map_location=torch.device('cpu'))


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [8]:
org_size = (256, 256)
# 이미지 전처리 (모델 학습시 사용했던 전처리 방법을 그대로 사용해야 함)
test_transform = transforms.Compose([
    transforms.Resize(org_size),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

In [12]:
class_to_idx = {
    0: 'apple_pie',
    1: 'Baked Potato',
    2: 'burger',
    3: 'butter_naan',
    4: 'chai',
    5: 'chapati',
    6: 'cheesecake',
    7: 'chicken_curry',
    8: 'chole_bhature',
    9: 'Crispy Chicken',
    10: 'dal_makhani',
    11: 'dhokla',
    12: 'Donut',
    13: 'fried_rice',
    14: 'Fries',
    15: 'Hot Dog',
    16: 'ice_cream',
    17: 'idli',
    18: 'jalebi',
    19: 'kaathi_rolls',
    20: 'kadai_paneer',
    21: 'kulfi',
    22: 'masala_dosa',
    23: 'momos',
    24: 'omelette',
    25: 'paani_puri',
    26: 'pakode',
    27: 'pav_bhaji',
    28: 'pizza',
    29: 'samosa',
    30: 'Sandwich',
    31: 'sushi',
    32: 'Taco',
    33: 'Taquito'
}

# 웹캠 열기
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # OpenCV 이미지를 PIL 이미지로 변환
    img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    # 이미지 전처리
    img_tensor = test_transform(img).unsqueeze(0)  # 배치 차원 추가

    # 예측
    with torch.no_grad():
        output = model(img_tensor)
        _, predicted = torch.max(output, 1)
        label = predicted.item()
        class_name = class_to_idx[label]  # 클래스명으로 변환
        

    # 예측한 레이블을 화면에 표시
    cv2.putText(frame, f"Predicted Label: {label}, {class_name}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

    # 웹캠 화면 출력
    cv2.imshow("Webcam", frame)

    # 'q'를 누르면 종료
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()