In [49]:
import os
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision.utils import draw_bounding_boxes
from PIL import Image, ImageDraw
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from sklearn.model_selection import train_test_split


for dirname, _, filenames in os.walk("dataset"):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        

dataset/images/62.PNG
dataset/images/121.PNG
dataset/images/149.PNG
dataset/images/24.PNG
dataset/images/143.PNG
dataset/images/227.png
dataset/images/43.PNG
dataset/images/106.PNG
dataset/images/172.PNG
dataset/images/266.png
dataset/images/71.PNG
dataset/images/199.PNG
dataset/images/49.PNG
dataset/images/36.PNG
dataset/images/170.PNG
dataset/images/110.PNG
dataset/images/139.PNG
dataset/images/2.PNG
dataset/images/191.PNG
dataset/images/85.PNG
dataset/images/130.PNG
dataset/images/216.png
dataset/images/46.PNG
dataset/images/161.PNG
dataset/images/184.PNG
dataset/images/182.PNG
dataset/images/190.PNG
dataset/images/134.PNG
dataset/images/140.PNG
dataset/images/30.PNG
dataset/images/76.PNG
dataset/images/153.PNG
dataset/images/154.PNG
dataset/images/219.png
dataset/images/192.PNG
dataset/images/18.PNG
dataset/images/138.PNG
dataset/images/14.PNG
dataset/images/73.PNG
dataset/images/162.PNG
dataset/images/10.PNG
dataset/images/25.PNG
dataset/images/96.PNG
dataset/images/174.PNG
datase

In [50]:
# define the directories paths
labels_dir = "dataset/labels"
images_dir = "dataset/images"
root_dir = "dataset"

In [51]:
def show_transformed_image(dataset, idx):
    """
    Display a transformed image from the dataset along with its bounding boxes.

    Args:
        dataset (pool_sat_Dataset): The dataset object.
        idx (int): The index of the image to display.

    Returns:
        None
    """
    image, target = dataset[idx]
    image = image.permute(1, 2, 0)  # Convert from CxHxW to HxWxC
    image = image.numpy()
    fig, ax = plt.subplots(1)
    ax.imshow(image)
    print(target)
    for bbox in target['boxes']:
        rect = patches.Rectangle((bbox[0], bbox[1]), bbox[2]-bbox[0], bbox[3]-bbox[1], linewidth=1, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
    plt.show()

In [52]:
transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Match training normalization
    transforms.ToTensor()
])


In [53]:
def parse_xml(xml_file):
    """ Parse the xml file and return the image data """
    with open(xml_file) as f:
        data = f.read()
    soup = BeautifulSoup(data, "xml")
    
    
    image_data = {
        "filename": soup.find("filename").text,
        "width": int(soup.find("size").find("width").text),
        "height": int(soup.find("size").find("height").text),
        "depth": int(soup.find("size").find("depth").text),
        "xmin": [],
        "ymin": [],
        "xmax": [],
        "ymax": [],
    }
    
    for obj in soup.find_all("object"):
        image_data["xmin"].append(int(obj.find("bndbox").find("xmin").text))
        image_data["ymin"].append(int(obj.find("bndbox").find("ymin").text))
        image_data["xmax"].append(int(obj.find("bndbox").find("xmax").text))
        image_data["ymax"].append(int(obj.find("bndbox").find("ymax").text))
        
    return image_data

In [54]:
all_images = []

for label_file in os.listdir(labels_dir):
    if label_file.endswith('.xml'):
        xml_path = os.path.join(labels_dir, label_file)
        image_data = parse_xml(xml_path)
        all_images.append(image_data)
        
df = pd.DataFrame(all_images)

df.head()

Unnamed: 0,filename,width,height,depth,xmin,ymin,xmax,ymax
0,177.PNG,295,184,3,"[51, 139]","[24, 140]","[73, 162]","[69, 179]"
1,62.PNG,115,171,3,[32],[126],[51],[165]
2,128.PNG,227,185,3,"[17, 73, 98, 137, 163]","[21, 63, 90, 120, 144]","[49, 104, 129, 170, 196]","[55, 89, 113, 141, 168]"
3,218.png,379,302,3,[334],[132],[369],[167]
4,33.PNG,204,143,3,"[15, 143]","[26, 106]","[45, 172]","[45, 130]"


In [62]:
train_df, test_df = train_test_split(df, test_size=0.2)

train_df.head(), test_df.head()

(    filename  width  height  depth                  xmin  \
 85   122.PNG    156     188      3         [12, 51, 113]   
 53    24.PNG    179      91      3                  [87]   
 124  220.png    366     294      3                 [101]   
 119   40.PNG    170     117      3  [8, 22, 37, 61, 142]   
 28    31.PNG    106      85      3                  [32]   
 
                      ymin                   xmax                  ymax  
 85         [85, 105, 148]          [49, 83, 137]       [108, 135, 174]  
 53                   [20]                  [101]                  [47]  
 124                 [170]                  [114]                 [223]  
 119  [22, 33, 42, 65, 51]  [19, 29, 43, 71, 153]  [39, 48, 62, 83, 58]  
 28                   [29]                   [58]                  [62]  ,
     filename  width  height  depth                     xmin  \
 44   188.PNG    346     242      3  [65, 85, 218, 222, 269]   
 1     62.PNG    115     171      3                     [32