## Conectando com o Drive

In [27]:
# Conexão com o Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Instalando as bibliotecas necessárias

In [28]:
# Instalar as bibliotecas necessárias
!pip install torch torchvision



## Importando os dados

In [29]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torch.optim as optim
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Paths to your XML annotations and video file
xml_file = '/content/drive/MyDrive/approach_nwot/xml/annotations_deteccao.xml'
video_file = '/content/drive/MyDrive/approach_nwot/videos/video_colorido_concatenado_10fps.mp4'

# Define crop intervals
color_crop_rows = (0, 720)
color_crop_cols = (250, 500)

def parse_annotations(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    annotations = {}

    for track in root.findall('track'):
        label = track.get('label')
        for box in track.findall('box'):
            frame_id = int(box.get('frame'))
            outside = int(box.get('outside'))
            if outside == 1:
                continue  # Skip boxes marked as outside
            xtl = float(box.get('xtl'))
            ytl = float(box.get('ytl'))
            xbr = float(box.get('xbr'))
            ybr = float(box.get('ybr'))

            # Initialize frame in annotations if not already
            if frame_id not in annotations:
                annotations[frame_id] = {'boxes': [], 'labels': []}

            annotations[frame_id]['boxes'].append([xtl, ytl, xbr, ybr])
            annotations[frame_id]['labels'].append(label)

    return annotations

def adjust_annotations(annotations, crop_rows, crop_cols):
    adjusted_annotations = {}
    for frame_id, data in annotations.items():
        adjusted_boxes = []
        adjusted_labels = []

        for box, label in zip(data['boxes'], data['labels']):
            xtl, ytl, xbr, ybr = box
            xtl -= crop_cols[0]
            ytl -= crop_rows[0]
            xbr -= crop_cols[0]
            ybr -= crop_rows[0]
            # Check if the box is within the crop
            if (xtl >= 0 and ytl >= 0 and xbr <= crop_cols[1] - crop_cols[0] and ybr <= crop_rows[1] - crop_rows[0]):
                adjusted_boxes.append([xtl, ytl, xbr, ybr])
                adjusted_labels.append(label)

        if adjusted_boxes:
            adjusted_annotations[frame_id] = {'boxes': adjusted_boxes, 'labels': adjusted_labels}

    return adjusted_annotations

# Parse and adjust annotations
annotations = parse_annotations(xml_file)
adjusted_annotations = adjust_annotations(annotations, color_crop_rows, color_crop_cols)

# Prepare frames and targets
cap = cv2.VideoCapture(video_file)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print("Total number of frames:", frame_count)

frames = []
targets = []

for frame_id in range(frame_count):
    ret, frame = cap.read()
    if not ret:
        break

    # Crop the frame
    cropped_frame = frame[color_crop_rows[0]:color_crop_rows[1], color_crop_cols[0]:color_crop_cols[1]]

    # Get the adjusted annotations for this frame
    if frame_id in adjusted_annotations:
        annots = adjusted_annotations[frame_id]

        # Collect the annotations
        boxes = []
        labels = []

        for box, label in zip(annots['boxes'], annots['labels']):
            xtl, ytl, xbr, ybr = map(int, box)
            boxes.append([xtl, ytl, xbr, ybr])
            labels.append(label)

        frames.append(cropped_frame)
        targets.append({'boxes': boxes, 'labels': labels})
    else:
        # If no annotations, skip
        continue

cap.release()

# Map labels to integers
label_set = set()
for target in targets:
    label_set.update(target['labels'])
label_map = {label: idx+1 for idx, label in enumerate(sorted(label_set))}  # +1 because 0 is background

# Update targets with integer labels
for target in targets:
    target['labels'] = [label_map[label] for label in target['labels']]

# Define the Dataset class
class CowHeadDataset(Dataset):
    def __init__(self, frames, targets, transforms=None):
        self.frames = frames
        self.targets = targets
        self.transforms = transforms

    def __getitem__(self, idx):
        img = self.frames[idx]
        target = self.targets[idx]

        # Convert image to PIL Image
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = torchvision.transforms.ToPILImage()(img)

        # Convert boxes and labels to tensors
        boxes = torch.as_tensor(target['boxes'], dtype=torch.float32)
        labels = torch.as_tensor(target['labels'], dtype=torch.int64)

        # Additional fields required by some models
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)

        # Create target dictionary
        target_dict = {}
        target_dict['boxes'] = boxes
        target_dict['labels'] = labels
        target_dict['image_id'] = image_id
        target_dict['area'] = area
        target_dict['iscrowd'] = iscrowd

        # Apply transforms if any
        if self.transforms:
            img = self.transforms(img)

        return img, target_dict

    def __len__(self):
        return len(self.frames)

Total number of frames: 81992




Total parameters in the model: 41299161
Total trainable parameters: 41076761


## Criando dataset e definindo o modelo

In [None]:
# Create the dataset and dataloaders
dataset = CowHeadDataset(frames, targets, transforms=torchvision.transforms.ToTensor())
train_indices, val_indices = train_test_split(range(len(dataset)), test_size=0.2, random_state=42)
train_dataset = torch.utils.data.Subset(dataset, train_indices)
val_dataset = torch.utils.data.Subset(dataset, val_indices)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

# Define the model
num_classes = len(label_map) + 1  # +1 for background
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Count the number of parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters in the model: {total_params}")
print(f"Total trainable parameters: {trainable_params}")

## Treinando o modelo

In [32]:
# Training setup
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = 100  # Adjust the number of epochs as needed

# Training loop
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for images, targets in tqdm(train_loader):
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        epoch_loss += losses.item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {epoch_loss/len(train_loader)}")


100%|██████████| 15/15 [00:10<00:00,  1.40it/s]


Epoch 1, Loss: 0.0500256617863973


100%|██████████| 15/15 [00:11<00:00,  1.34it/s]


Epoch 2, Loss: 0.04520623510082563


100%|██████████| 15/15 [00:11<00:00,  1.32it/s]


Epoch 3, Loss: 0.042717597509423895


100%|██████████| 15/15 [00:11<00:00,  1.34it/s]


Epoch 4, Loss: 0.038692340006430945


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 5, Loss: 0.031075355658928553


100%|██████████| 15/15 [00:10<00:00,  1.38it/s]


Epoch 6, Loss: 0.02854727494219939


100%|██████████| 15/15 [00:10<00:00,  1.38it/s]


Epoch 7, Loss: 0.028588796158631642


100%|██████████| 15/15 [00:10<00:00,  1.38it/s]


Epoch 8, Loss: 0.022421305254101753


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 9, Loss: 0.021941665187478067


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 10, Loss: 0.021859027072787286


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 11, Loss: 0.022481579209367435


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 12, Loss: 0.022465495206415654


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 13, Loss: 0.019998469079534214


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 14, Loss: 0.01846988368779421


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 15, Loss: 0.016955371821920077


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 16, Loss: 0.015105853974819183


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 17, Loss: 0.013982832493881384


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 18, Loss: 0.013478715904057026


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 19, Loss: 0.013251792391141255


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 20, Loss: 0.012800544251998265


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 21, Loss: 0.014947186720867952


100%|██████████| 15/15 [00:10<00:00,  1.36it/s]


Epoch 22, Loss: 0.014842576595644157


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 23, Loss: 0.01767330088963111


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 24, Loss: 0.015461222268640995


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 25, Loss: 0.013507809241612751


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 26, Loss: 0.012171666386226814


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 27, Loss: 0.012531565502285957


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 28, Loss: 0.01168876929829518


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 29, Loss: 0.01177396128575007


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 30, Loss: 0.010804286474982898


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 31, Loss: 0.011885684759666523


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 32, Loss: 0.014253593360384306


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 33, Loss: 0.013040630860875051


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 34, Loss: 0.013132496550679208


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 35, Loss: 0.011943343126525481


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 36, Loss: 0.010679795903464158


100%|██████████| 15/15 [00:10<00:00,  1.36it/s]


Epoch 37, Loss: 0.009280300854394833


100%|██████████| 15/15 [00:10<00:00,  1.36it/s]


Epoch 38, Loss: 0.009432066914935906


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 39, Loss: 0.01061465973034501


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 40, Loss: 0.009610622593512138


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 41, Loss: 0.010710430517792701


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 42, Loss: 0.011103912846495707


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 43, Loss: 0.011249647134294112


100%|██████████| 15/15 [00:10<00:00,  1.36it/s]


Epoch 44, Loss: 0.010879618519296249


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 45, Loss: 0.010541179000089567


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 46, Loss: 0.008668610608826082


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 47, Loss: 0.008191309062143166


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 48, Loss: 0.007548483290399114


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 49, Loss: 0.007964725140482187


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 50, Loss: 0.007903222957005103


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 51, Loss: 0.008357648675640424


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 52, Loss: 0.008117486909031867


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 53, Loss: 0.008514078675458829


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 54, Loss: 0.008121336965511242


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 55, Loss: 0.011088095139712095


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 56, Loss: 0.010756445396691561


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 57, Loss: 0.00960309775546193


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 58, Loss: 0.00836073827619354


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 59, Loss: 0.00894675636664033


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 60, Loss: 0.007853489896903435


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 61, Loss: 0.0072843298626442754


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 62, Loss: 0.006918978784233332


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 63, Loss: 0.007270065974444151


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 64, Loss: 0.00667716107952098


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 65, Loss: 0.0067976101612051325


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 66, Loss: 0.007552044248829285


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 67, Loss: 0.006534537393599748


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 68, Loss: 0.006570879897723595


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 69, Loss: 0.006268517641971508


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 70, Loss: 0.007803242063770691


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 71, Loss: 0.006541486922651529


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 72, Loss: 0.0068905923515558245


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 73, Loss: 0.006597171071916819


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 74, Loss: 0.007120538658152024


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 75, Loss: 0.006498626464356979


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 76, Loss: 0.007892819090435902


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 77, Loss: 0.006277887662872672


100%|██████████| 15/15 [00:10<00:00,  1.36it/s]


Epoch 78, Loss: 0.006227579743911822


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 79, Loss: 0.0058018673832217855


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 80, Loss: 0.006946148723363877


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 81, Loss: 0.011352210802336534


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 82, Loss: 0.012297073379158974


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 83, Loss: 0.011249029201765855


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 84, Loss: 0.008996288136889538


100%|██████████| 15/15 [00:10<00:00,  1.36it/s]


Epoch 85, Loss: 0.010124681983143091


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 86, Loss: 0.008026736478010813


100%|██████████| 15/15 [00:10<00:00,  1.38it/s]


Epoch 87, Loss: 0.0068102001833419004


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 88, Loss: 0.007953545513252417


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 89, Loss: 0.006073700698713462


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 90, Loss: 0.007386318780481816


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 91, Loss: 0.005857235348472992


100%|██████████| 15/15 [00:11<00:00,  1.36it/s]


Epoch 92, Loss: 0.006863554411878189


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 93, Loss: 0.0062397093201677


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 94, Loss: 0.007042252365499735


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 95, Loss: 0.006845310050994158


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 96, Loss: 0.007892053915808599


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 97, Loss: 0.007802837993949652


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 98, Loss: 0.007488517463207245


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]


Epoch 99, Loss: 0.007535279635339976


100%|██████████| 15/15 [00:10<00:00,  1.37it/s]

Epoch 100, Loss: 0.009284294582903386





## Analisando o resultado do modelo fazendo inferência em um vídeo importado

In [34]:
# Specify the range of frames you want to analyze
start_frame = 1500  # Start frame
end_frame = 1600    # End frame

# Load the video
cap = cv2.VideoCapture(video_file)

# Loop through the specified frames
for frame_id in range(start_frame, end_frame):
    # Position the video at the desired frame
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_id)

    # Read the frame
    ret, frame = cap.read()
    if not ret:
        break

    # Crop the frame
    cropped_frame = frame[color_crop_rows[0]:color_crop_rows[1], color_crop_cols[0]:color_crop_cols[1]]

    # Convert to tensor
    img_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB)
    img_tensor = torchvision.transforms.ToTensor()(img_rgb)
    img_tensor = img_tensor.to(device)

    # Model evaluation without gradients
    model.eval()
    with torch.no_grad():
        output = model([img_tensor])

    # Visualize the results
    img = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
    img = np.ascontiguousarray(img, dtype=np.uint8)

    # Extract boxes, scores, and labels
    boxes = output[0]['boxes'].cpu().numpy()
    scores = output[0]['scores'].cpu().numpy()
    labels = output[0]['labels'].cpu().numpy()

    # Draw bounding boxes
    for box, score, label in zip(boxes, scores, labels):
        if score > 0.5:  # Confidence threshold
            xtl, ytl, xbr, ybr = box.astype(int)
            cv2.rectangle(img, (xtl, ytl), (xbr, ybr), (0, 255, 0), 2)
            label_name = list(label_map.keys())[list(label_map.values()).index(label)]
            cv2.putText(img, f"{label_name}: {score:.2f}", (xtl, ytl-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (36,255,12), 2)

    # Convert back to RGB for plotting
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(12, 8))
    plt.imshow(img)
    plt.axis('off')
    plt.show()

# Release the video
cap.release()


A seguir, salvamos tanto os pesos do modelo, quanto o modelo completo

In [33]:
# Salvar os pesos do modelo
torch.save(model.state_dict(), 'modelo_deteccao_cabeca.pth')

# Salvar o modelo completo (incluindo a arquitetura)
torch.save(model, 'modelo_deteccao_cabeca_completo.pth')

## Visualizando os resultados

In [None]:
# Visualization of results
model.eval()
import random

num_images_to_show = 40
for idx in random.sample(range(len(val_dataset)), num_images_to_show):
    img, target = val_dataset[idx]
    # img is a tensor, move to device
    img = img.to(device).float() / 255.0  # Converte para float32 e normaliza
    # Run the model
    with torch.no_grad():
        prediction = model([img])
    # Convert image to CPU and numpy
    img_np = img.cpu().numpy()
    img_np = np.transpose(img_np, (1, 2, 0))
    img_np = (img_np * 255).astype(np.uint8)
    # Plot the image
    fig, ax = plt.subplots(1, figsize=(12, 9))
    ax.imshow(img_np)
    # Plot ground truth boxes in green
    boxes = target['boxes'].cpu().numpy()
    for box in boxes:
        rect = plt.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1],
                             linewidth=2, edgecolor='g', facecolor='none')
        ax.add_patch(rect)
    # Plot predicted boxes in red
    pred_boxes = prediction[0]['boxes'].cpu().numpy()
    pred_scores = prediction[0]['scores'].cpu().numpy()
    # Only plot boxes with score above a threshold
    threshold = 0.5
    for box, score in zip(pred_boxes, pred_scores):
        if score > threshold:
            rect = plt.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1],
                                 linewidth=2, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
    plt.axis('off')
    plt.show()


Output hidden; open in https://colab.research.google.com to view.