# Pet Tracking Project

### Importing Libraries

In [None]:
import cv2
import os
import numpy as np
import xml.etree.ElementTree as ET
import torch
import subprocess
from ultralytics import YOLO

print("All libraries installed successfully!")


### Video Cropping

This script cropps the video to 1280x736 size

In [None]:
def crop_video(input_path, output_path, crop_width=1280, crop_height=736, crop_x=0, crop_y=100):

    command = [
        'ffmpeg',
        '-i', input_path,
        '-vf', f'crop={crop_width}:{crop_height}:{crop_x}:{crop_y}',
        '-c:a', 'copy',
        output_path
    ]
    
    subprocess.run(command, check=True)
    
    print(f'Video has been cropped: {output_path}')

input_video = 'Videos\OriginalVideos\cat22.mp4'
output_video = 'Videos\CroppedVideos\cat22_cropped.mp4'

crop_video(input_video, output_video, crop_width=1280, crop_height=736, crop_x=180, crop_y=130)

### Frames Extracting

This script extracts frames, by changing frame_interval you can change how many frames you will get

In [None]:
video_path = 'Videos/CroppedVideos/cat22_cropped.mp4'
output_dir = 'frames_cat/'
os.makedirs(output_dir, exist_ok=True)

existing_frames = [f for f in os.listdir(output_dir) if f.startswith('frame_') and f.endswith('.jpg')]
if existing_frames:
    max_frame_number = max([int(f.split('_')[1].split('.')[0]) for f in existing_frames])
else:
    max_frame_number = -1

cap = cv2.VideoCapture(video_path)
frame_count = 0

frame_interval = 10

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    if frame_count % frame_interval == 0:
        frame_path = os.path.join(output_dir, f'frame_{max_frame_number + frame_count + 1:05d}.jpg')
        cv2.imwrite(frame_path, frame)
    frame_count += 1

cap.release()


### Converting Labels Format

This script converts format of label files from .xml (Pascal VOC format) to .txt (YOLO format)

In [None]:
def convert(size, box):
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)

def convert_annotation(image_id):
    in_file = open(f'OldLabels/{image_id}.xml')
    out_file = open(f'NewLabels/{image_id}.txt', 'w')
    tree = ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls != "dog" or int(difficult) == 1:
            continue
        cls_id = 0
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
             float(xmlbox.find('ymax').text))
        bb = convert((w, h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

if __name__ == '__main__':
    classes = ["dog"]

    if not os.path.exists('NewLabels/'):
        os.makedirs('NewLabels/')

    image_ids = [os.path.splitext(f)[0] for f in os.listdir('OldLabels') if f.endswith('.xml')]
    for image_id in image_ids:
        convert_annotation(image_id)


### Resizing Frames + Adjusting Labels

This script resizes and pads images to a target size while adjusting the corresponding labels to maintain the correct bounding box coordinates

In [None]:
def preprocess_image_and_labels(image_path, label_path, output_image_path, output_label_path, target_size=(1280, 1280)):

    image = cv2.imread(image_path)
    h, w = image.shape[:2]

    if w != 1280 or h != 736:
        raise ValueError(f"Expected image size 1280x736, but got {w}x{h}")

    padded_image = np.full((target_size[1], target_size[0], 3), 128, dtype=np.uint8)
    pad_y = (target_size[1] - h) // 2
    padded_image[pad_y:pad_y + h, 0:w] = image

    cv2.imwrite(output_image_path, padded_image)

    with open(label_path, 'r') as file:
        lines = file.readlines()

    with open(output_label_path, 'w') as file:
        for line in lines:
            parts = line.strip().split()
            class_id = parts[0]
            x_center = float(parts[1])
            y_center = float(parts[2])
            width = float(parts[3])
            height = float(parts[4])

            x_center_abs = x_center * w
            y_center_abs = y_center * h
            width_abs = width * w
            height_abs = height * h

            y_center_abs += pad_y

            x_center = x_center_abs / target_size[0]
            y_center = y_center_abs / target_size[1]
            width = width_abs / target_size[0]
            height = height_abs / target_size[1]

            file.write(f"{class_id} {x_center} {y_center} {width} {height}\n")


input_image_folder = 'frames_cat'
input_label_folder = 'frames_cat_labeled'
output_image_folder = 'NewCatFrames'
output_label_folder = 'NewCatLabels'

os.makedirs(output_image_folder, exist_ok=True)
os.makedirs(output_label_folder, exist_ok=True)

for filename in os.listdir(input_image_folder):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        image_path = os.path.join(input_image_folder, filename)
        label_path = os.path.join(input_label_folder, filename.replace('.jpg', '.txt').replace('.png', '.txt'))
        output_image_path = os.path.join(output_image_folder, filename)
        output_label_path = os.path.join(output_label_folder, filename.replace('.jpg', '.txt').replace('.png', '.txt'))

        preprocess_image_and_labels(image_path, label_path, output_image_path, output_label_path)


### Model Training

This script trains the model

In [None]:
if torch.cuda.is_available():
    print("GPU is available")
    device = torch.device('cuda')
else:
    print("GPU is not available, using CPU")
    device = torch.device('cpu')

model = YOLO('yolov8n.pt')

dataset_config = 'E:/PetTrackingProject/coco.yaml'

model.train(data=dataset_config, epochs=50, imgsz=1280, device=device)

### Dog Detection

This script detects the dog on a video

In [None]:
model = YOLO('runs/detect/train/weights/best.pt')

name = 'Another_Angle_Test'

input_video_path = f'Videos/OriginalVideos/{name}.mp4'
output_video_path = f'Videos/{name}_output.mp4'
cap = cv2.VideoCapture(input_video_path)

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    results = model(frame)
    
    for result in results:
        for bbox in result.boxes:
            x1, y1, x2, y2 = map(int, bbox.xyxy[0])
            confidence = bbox.conf[0]
            class_id = int(bbox.cls[0])
            label = model.names[class_id]

            if label == 'dog':
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
                cv2.putText(frame, f'{label} {confidence:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)
    
    out.write(frame)

cap.release()
out.release()
cv2.destroyAllWindows()


### Marking the Doorway 

This script gives you a photo, where you can pinpoint the doorway and get a coordinates of it

In [None]:
points = []

def get_coordinates(event, x, y, flags, param):
    if event == cv2.EVENT_LBUTTONDOWN:
        points.append((x, y))
        print(f"Point selected: ({x}, {y})")
        cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)
        cv2.imshow('Frame', frame)

cap = cv2.VideoCapture('Videos/OriginalVideos/Dog_Enters_Leaves_Test.mp4')
ret, frame = cap.read()
cap.release()

if not ret:
    print("Failed to read the video")
else:
    cv2.imshow('Frame', frame)
    cv2.setMouseCallback('Frame', get_coordinates)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

print("Selected points:", points)


### Dog Movement Detection

This script detects whether the dog inside, in doorway or outside

In [None]:
model = YOLO('runs/detect/train3/weights/best.pt')

cap = cv2.VideoCapture('Videos/OriginalVideos/Dog_Enters_Leaves_Test.mp4')

entrance_exit_polygon = np.array([(431, 197), (507, 194), (512, 132), (436, 134)])

dog_state = 'outside'

def is_in_doorway(center, polygon):
    return cv2.pointPolygonTest(polygon, (int(center[0]), int(center[1])), False) >= 0

fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('Videos/Results/Dog_Enters_Leaves_output.avi', fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    results = model(frame)
    
    dog_detected = False
    dog_center = None
    
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
            label = box.cls.item()
            
            if label == 0:
                dog_center = ((x1 + x2) // 2, (y1 + y2) // 2)
                dog_detected = True
                
                cv2.putText(frame, 'Dog', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
    
    cv2.polylines(frame, [entrance_exit_polygon], isClosed=True, color=(255, 0, 0), thickness=2)
    
    if dog_state == 'outside':
        if dog_detected and is_in_doorway(dog_center, entrance_exit_polygon):
            dog_state = 'in_doorway'
    elif dog_state == 'in_doorway':
        if dog_detected and not is_in_doorway(dog_center, entrance_exit_polygon):
            dog_state = 'outside'
        elif not dog_detected:
            dog_state = 'inside'
    elif dog_state == 'inside':
        if dog_detected and is_in_doorway(dog_center, entrance_exit_polygon):
            dog_state = 'in_doorway'
    
    cv2.putText(frame, f'Dog State: {dog_state}', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)
    
    out.write(frame)

    cv2.imshow('CCTV Feed', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()

### Cat Frames/Labels Appending

This script merges the cat frames and labels with dog frames and labels avoiding name conflicts 

In [None]:
import os
import shutil

def rename_and_copy_files(src_folder, dest_folder, prefix):
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
    for filename in os.listdir(src_folder):
        src_file = os.path.join(src_folder, filename)
        if os.path.isfile(src_file):
            new_filename = prefix + "_" + filename
            dest_file = os.path.join(dest_folder, new_filename)
            shutil.copy2(src_file, dest_file)

def update_labels(src_folder, dest_folder, class_index, prefix):
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
    for filename in os.listdir(src_folder):
        src_file = os.path.join(src_folder, filename)
        new_filename = prefix + "_" + filename
        dest_file = os.path.join(dest_folder, new_filename)
        with open(src_file, 'r') as file:
            lines = file.readlines()
        with open(dest_file, 'w') as file:
            for line in lines:
                parts = line.strip().split()
                if len(parts) == 5:
                    parts[0] = str(class_index)
                    file.write(' '.join(parts) + '\n')

dog_images_folder = 'NewNewFrames'
cat_images_folder = 'NewCatFrames'
dog_labels_folder = 'NewNewLabels'
cat_labels_folder = 'NewCatLabels'

merged_images_folder = 'MergedFrames'
merged_labels_folder = 'MergedLabels'

rename_and_copy_files(dog_images_folder, merged_images_folder, 'dog')
update_labels(dog_labels_folder, merged_labels_folder, 0, 'dog')

rename_and_copy_files(cat_images_folder, merged_images_folder, 'cat')
update_labels(cat_labels_folder, merged_labels_folder, 1, 'cat')

print("Files renamed, merged, and labels updated successfully.")


### Model Fine-Tuning with Cat Data

This script trains a trained model once more to introduce the Cat

In [None]:
if torch.cuda.is_available():
    print("GPU is available")
    device = torch.device('cuda')
else:
    print("GPU is not available, using CPU")
    device = torch.device('cpu')

model = YOLO('runs/detect/train/weights/best.pt')

dataset_config = 'E:/PetTrackingProject/coco.yaml'

results = model.train(data=dataset_config, epochs=50, imgsz=1280, device=device)

### Dog and Cat Detection

This script detects the Dog and Cat simultaneously 

In [None]:
import cv2
from ultralytics import YOLO

model = YOLO('runs/detect/train5/weights/best.pt')

input_video_path = f'Videos/OriginalVideos/DogCatTest.mp4'
output_video_path = f'Videos/Results/DogCatTest_output.mp4'

cap = cv2.VideoCapture(input_video_path)

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    results = model(frame)
    
    for result in results:
        for bbox in result.boxes:
            x1, y1, x2, y2 = map(int, bbox.xyxy[0])
            confidence = bbox.conf[0]
            class_id = int(bbox.cls[0])
            label = model.names[class_id]

            if label == 'dog':
                color = (0, 0, 255)
            elif label == 'cat':
                color = (255, 0, 0)
            else:
                continue

            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, f'{label} {confidence:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
    
    out.write(frame)

cap.release()
out.release()
cv2.destroyAllWindows()
