In [2]:
import cv2
from ultralytics import YOLO
import torch
import torch.nn as nn
from torchvision import transforms, models
from PIL import Image

In [3]:
# Load YOLO models
model_cow = YOLO('cowModel.pt')

In [4]:
# Open video file
video_path = 'cowvideo.mp4'  # Replace with your video path
cap = cv2.VideoCapture(video_path)

In [5]:
# Define transformations (same as used during training)
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load MobileNetV2 model
model = models.mobilenet_v2(weights=None)
model.classifier = nn.Sequential(
    nn.Dropout(0.3),
    nn.Linear(model.last_channel, 2)
)

# Load trained model weights
MODEL_PATH = "D:\\aswin\\Study\\Class\\s8\\bc\\backend\\best_model_epoch16_f10.926.pth"
checkpoint = torch.load(MODEL_PATH, map_location=torch.device('cpu'))

# Load state dict into the model
if isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
    model.load_state_dict(checkpoint['state_dict'], strict=True)
else:
    model.load_state_dict(checkpoint, strict=True)

model.eval()

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [6]:
def predict(image_input):
    # Check if the input is a PIL.Image object
    if isinstance(image_input, Image.Image):
        image = image_input.convert('RGB')  # Ensure image is in RGB format
    else:
        # Assume it's a file path
        image = Image.open(image_input).convert('RGB')

    # Preprocess the image
    image = transform(image).unsqueeze(0)  # Add batch dimension
    image = image.to(device)  # Move image to the same device as the model

    # Perform prediction
    with torch.no_grad():
        outputs = model(image)
        _, predicted = outputs.max(1)

    return predicted.item() == 0  # Return True if the image has lumps

In [7]:
# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
if fps <= 0:  # Handle invalid FPS values
    fps = 30

# Initialize video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output_video.mp4', fourcc, fps, (frame_width, frame_height))

In [8]:
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Detect cows (using YOLO or your detection model)
    results_cow = model_cow.predict(frame)
    
    if len(results_cow) > 0 and hasattr(results_cow[0], "boxes"):
        cow_boxes = results_cow[0].boxes.data.cpu().numpy()
    else:
        cow_boxes = []

    for box in cow_boxes:
        x1, y1, x2, y2, conf, cls = box.astype(int)
        cow_region = frame[y1:y2, x1:x2]

        # Convert OpenCV BGR to PIL RGB
        cow_region_pil = Image.fromarray(cv2.cvtColor(cow_region, cv2.COLOR_BGR2RGB))

        # Detect lumps using MobileNet (now works with PIL Image)
        has_lumps = predict(cow_region_pil)  # Directly pass the PIL Image

        # Draw bounding box
        color = (0, 0, 255) if has_lumps else (0, 255, 0)
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        label = "Infected" if has_lumps else "Healthy"
        cv2.putText(frame, f'Cow {conf:.2f} - {label}', (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Write the processed frame to output video
    out.write(frame)

    # Show frame
    cv2.imshow('Detection', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()


0: 640x384 2 cows, 200.1ms
Speed: 11.5ms preprocess, 200.1ms inference, 13.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2 cows, 87.9ms
Speed: 4.5ms preprocess, 87.9ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2 cows, 86.6ms
Speed: 2.0ms preprocess, 86.6ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2 cows, 88.0ms
Speed: 4.3ms preprocess, 88.0ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 4 cows, 86.2ms
Speed: 2.3ms preprocess, 86.2ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 5 cows, 81.8ms
Speed: 1.8ms preprocess, 81.8ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 5 cows, 79.2ms
Speed: 1.7ms preprocess, 79.2ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 4 cows, 75.9ms
Speed: 1.7ms preprocess, 75.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)

0: 