In [None]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import xml.etree.ElementTree as ET
import torchvision.transforms as transforms
import multiprocessing
import sys

# Add YOLOv7 to the Python path
sys.path.append(r"C:\Users\NiravG\yolov7")  # Replace with your yolov7 repo path

# Import YOLOv7 specific loading function
from models.experimental import attempt_load

# Preprocess function for YOLOv7
def preprocess_image(image, img_size=640):
    transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize as per ImageNet
    ])
    return transform(image)

# Convert boxes to YOLO format (class, x_center, y_center, width, height)
def convert_boxes_to_yolo_format(boxes, labels, img_width, img_height):
    yolo_boxes = []
    for box, label in zip(boxes, labels):
        xmin, ymin, xmax, ymax = box
        x_center = ((xmin + xmax) / 2.0) / img_width
        y_center = ((ymin + ymax) / 2.0) / img_height
        box_width = (xmax - xmin) / img_width
        box_height = (ymax - ymin) / img_height
        yolo_boxes.append([label, x_center, y_center, box_width, box_height])
    return yolo_boxes

# PascalVOCDataset class for YOLOv7
class PascalVOCDataset(Dataset):
    def __init__(self, image_dir, annotations_dir, img_size=640):
        self.image_dir = image_dir
        self.annotations_dir = annotations_dir
        self.image_filenames = os.listdir(image_dir)
        self.img_size = img_size

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, index):
        # Load image
        image_filename = self.image_filenames[index]
        image_path = os.path.join(self.image_dir, image_filename)
        image = Image.open(image_path).convert("RGB")
        img_width, img_height = image.size

        # Preprocess image for YOLOv7
        image = preprocess_image(image, self.img_size)

        # Load corresponding annotation (XML file)
        annotation_filename = os.path.splitext(image_filename)[0] + '.xml'
        annotation_path = os.path.join(self.annotations_dir, annotation_filename)
        boxes, labels = self._parse_annotation(annotation_path)

        # Convert boxes to YOLO format
        yolo_boxes = convert_boxes_to_yolo_format(boxes, labels, img_width, img_height)

        # Convert to tensor
        yolo_boxes = torch.tensor(yolo_boxes, dtype=torch.float32)

        return image, yolo_boxes

    def _parse_annotation(self, annotation_path):
        tree = ET.parse(annotation_path)
        root = tree.getroot()

        boxes = []
        labels = []

        for obj in root.findall('object'):
            label = obj.find('name').text
            label_idx = VOC_CLASSES.index(label)

            bbox = obj.find('bndbox')
            xmin = float(bbox.find('xmin').text)
            ymin = float(bbox.find('ymin').text)
            xmax = float(bbox.find('xmax').text)
            ymax = float(bbox.find('ymax').text)
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(label_idx)

        return boxes, labels

# VOC classes
VOC_CLASSES = [
    "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", 
    "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", 
    "pottedplant", "sheep", "sofa", "train", "tvmonitor"
]

# Load YOLOv7 model from the local .pt file
model_path = r'C:\Users\NiravG\yolov7.pt'
device = torch.device('cpu')  # Use 'cuda' if GPU is available
model = attempt_load(model_path, map_location=device)

# Check model parameters and filter leaf tensors
leaf_parameters = [p for p in model.parameters() if p.is_leaf and p.requires_grad]

# Warning if no leaf parameters are found
if len(leaf_parameters) == 0:
    print("Warning: No leaf parameters found that require gradients.")

# Set model to training mode
model.train()

# Paths to image and annotations directories
train_image_dir = r'C:\Users\NiravG\Downloads\archive (8)\VOC2012\JPEGImages'
train_annotations_dir = r'C:\Users\NiravG\Downloads\archive (8)\VOC2012\Annotations'
test_image_dir = r'C:\Users\NiravG\Downloads\archive (8)\VOC2012\JPEGImages'
test_annotations_dir = r'C:\Users\NiravG\Downloads\archive (8)\VOC2012\Annotations'

# Load the dataset and prepare the data loader
num_workers = multiprocessing.cpu_count()
train_data = PascalVOCDataset(train_image_dir, train_annotations_dir, img_size=640)
test_data = PascalVOCDataset(test_image_dir, test_annotations_dir, img_size=640)

train_loader = DataLoader(train_data, batch_size=16, shuffle=True, num_workers=num_workers)
test_loader = DataLoader(test_data, batch_size=16, shuffle=False, num_workers=num_workers)

# Set up optimizer (SGD for YOLOv7 training)
optimizer = torch.optim.Adam(leaf_parameters, lr=0.001, momentum=0.9)

# Training loop for YOLOv7
for epoch in range(10):  # 10 epochs as an example
    running_loss = 0.0
    for i, (images, targets) in enumerate(train_loader):
        images = images.to(device)
        
        # Ensure each target is converted to the appropriate device and format
        targets = [t.to(device) for t in targets]

        # Zero the gradient
        optimizer.zero_grad()

        # Forward pass and compute the loss
        loss, loss_items = model(images, targets)
        
        # Ensure the loss is scalar before backward pass
        loss = loss.sum() if isinstance(loss, torch.Tensor) and loss.ndim > 0 else loss
        
        loss.backward()

        # Optimizer step
        optimizer.step()

        running_loss += loss.item()
        if i % 10 == 0:
            print(f"Batch {i}, Loss: {loss.item()}")

    # Print average loss for the epoch
    print(f"Epoch {epoch+1} finished. Average Loss: {running_loss / len(train_loader)}")


  ckpt = torch.load(w, map_location=map_location)  # load


Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
