In [3]:
import cv2
import os
from collections import defaultdict
import glob
from torch.utils.data import Dataset
import torchvision.transforms as T
from torch.utils.data import DataLoader

In [4]:
#structure of the dataset
import os

def print_tree(startpath, indent=''):
    for item in os.listdir(startpath):
        path = os.path.join(startpath, item)
        print(indent + "|-- " + item)
        if os.path.isdir(path):
            print_tree(path, indent + "    ")

print_tree("C:/Users/2955352g/Desktop/pig_data_edinburgh")

|-- annotated
    |-- 2019_11_05
        |-- 000002
            |-- background.png
            |-- background_depth.png
            |-- color.mp4
            |-- depth.mp4
            |-- depth_scale.npy
            |-- inverse_intrinsic.npy
            |-- mask.png
            |-- output.json
            |-- rot.npy
            |-- times.txt
        |-- 000009
            |-- background.png
            |-- background_depth.png
            |-- color.mp4
            |-- depth.mp4
            |-- depth_scale.npy
            |-- inverse_intrinsic.npy
            |-- mask.png
            |-- output.json
            |-- rot.npy
            |-- times.txt
    |-- 2019_11_11
        |-- 000016
            |-- background.png
            |-- background_depth.png
            |-- color.mp4
            |-- depth.mp4
            |-- depth_scale.npy
            |-- inverse_intrinsic.npy
            |-- mask.png
            |-- output.json
            |-- rot.npy
            |-- times.txt
        |-- 

In [5]:
#
def load_annotations(json_path):
    with open(json_path) as f:
        data = json.load(f)
    
    # Organize by frame number
    annotations = defaultdict(list)
    for obj in data['objects']:
        for frame in obj['frames']:
            frame_num = frame['frameNumber']
            annotations[frame_num].append({
                'bbox': [frame['bbox']['x'], frame['bbox']['y'], 
                         frame['bbox']['width'], frame['bbox']['height']],
                'behavior': frame['behaviour'],
                'visible': frame['visible']
            })
    return annotations

In [6]:
#extarcting frames and aligning them
def extract_frames(video_path, output_dir, annotations=None, fps=5):
    os.makedirs(output_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    frame_rate = cap.get(cv2.CAP_PROP_FPS)
    frame_interval = int(frame_rate / fps)
    frame_count = 0
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
            
        if frame_count % frame_interval == 0:
            # Save frame
            frame_file = f"frame_{frame_count:04d}.jpg"
            cv2.imwrite(os.path.join(output_dir, frame_file), frame)
            
            # Save corresponding annotations if available
            if annotations and frame_count in annotations:
                with open(os.path.join(output_dir, f"frame_{frame_count:04d}.json"), 'w') as f:
                    json.dump(annotations[frame_count], f)
                    
        frame_count += 1
    cap.release()

In [7]:
base_dir = "pig_data_edinburgh/annotated"

for date_dir in glob.glob(f"{base_dir}/*"):
    for clip_dir in glob.glob(f"{date_dir}/*"):
        # Load annotations
        annotations = load_annotations(f"{clip_dir}/output.json")
        
        # Extract frames
        extract_frames(
            video_path=f"{clip_dir}/color.mp4",
            output_dir=f"processed/{os.path.basename(date_dir)}/{os.path.basename(clip_dir)}",
            annotations=annotations
        )

In [8]:
#converting to pytorch dataset
class PigBehaviorDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform or T.Compose([
            T.Resize(256),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        
        # Collect all annotated frames
        self.samples = []
        for date_dir in glob.glob(f"{root_dir}/*"):
            for clip_dir in glob.glob(f"{date_dir}/*"):
                for frame in glob.glob(f"{clip_dir}/frame_*.jpg"):
                    json_file = frame.replace('.jpg', '.json')
                    if os.path.exists(json_file):
                        self.samples.append((frame, json_file))
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        img_path, ann_path = self.samples[idx]
        
        # Load image
        img = Image.open(img_path).convert('RGB')
        
        # Load annotations
        with open(ann_path) as f:
            annotations = json.load(f)
        
        # Convert to tensors
        boxes = []
        behaviors = []
        for ann in annotations:
            boxes.append(ann['bbox'])
            behaviors.append(ann['behavior'])
        
        target = {
            'boxes': torch.as_tensor(boxes, dtype=torch.float32),
            'labels': torch.as_tensor([behavior_to_idx[b] for b in behaviors], dtype=torch.int64)
        }
        
        if self.transform:
            img = self.transform(img)
            
        return img, target

In [9]:
#behaviour classification
# Behavior to index mapping
behavior_to_idx = {
    'lying': 0,
    'standing': 1,
    'walking': 2,
    'investigating': 3,
    'drinking': 4
}

# Model setup (example using Faster R-CNN)
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

# Load a pretrained backbone
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280

# Define anchor generator
anchor_generator = AnchorGenerator(
    sizes=((32, 64, 128, 256, 512),),
    aspect_ratios=((0.5, 1.0, 2.0),)
)

# Put it all together
model = FasterRCNN(
    backbone,
    num_classes=len(behavior_to_idx)+1,  # +1 for background
    rpn_anchor_generator=anchor_generator
)

In [10]:
#training
dataset = PigBehaviorDataset("processed")
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=lambda x: x)

optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9)

for epoch in range(10):
    for images, targets in dataloader:
        # Forward pass
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        # Backward pass
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

ValueError: num_samples should be a positive integer value, but got num_samples=0