In [1]:
import cv2
import torch
from transformers import AutoImageProcessor, SwinForImageClassification
from PIL import Image
import numpy as np

# Load processor and model
processor = AutoImageProcessor.from_pretrained("./model")
model = SwinForImageClassification.from_pretrained("./model")
id2label = model.config.id2label

# Use IP camera stream URL
ip_camera_url = "http://192.168.0.169:8080/video"  # change this
cap = cv2.VideoCapture(ip_camera_url)

while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to get frame from IP camera.")
        break

    # Convert frame to RGB and then PIL image
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pil_image = Image.fromarray(img_rgb)

    # Preprocess and predict
    inputs = processor(images=pil_image, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class_idx = logits.argmax(-1).item()
        label = id2label[predicted_class_idx]

    # Display result
    cv2.putText(frame, f"Prediction: {label}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow("Real-Time Classification", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


  from .autonotebook import tqdm as notebook_tqdm
  torch.utils._pytree._register_pytree_node(


Failed to get frame from IP camera.
