In [None]:
# !git clone https://github.com/HoanChan/TNCR

In [None]:
# Lấy danh sách các file xml trong thư mục TNCR

import os
import glob

#lấy danh sách các file xml trong thư mục TNCR
files = glob.glob('TNCR/*.xml')

#in ra số lượng file
print(len(files))

#in ra 10 cái
print(files[:10])

In [None]:
# in nội dụng 1 file

with open(files[0], 'r', encoding='utf-8') as f:
    print(f.read())

In [None]:
import glob
import xml.etree.ElementTree as ET

class Anotation:
  def __init__(self, path, folder, objects, size):
      self.path = path
      self.folder = folder
      self.objects = objects
      self.size = size

class PVOC:
    def __init__(self, name, pose, truncated, difficult, bbox):        
        self.name = name
        self.pose = pose
        self.truncated = truncated
        self.difficult = difficult
        self.bndbox = bbox        

def read_xml_files(folder_path):

    # Kiểm tra xem đường dẫn thư mục tồn tại hay không
    if not os.path.exists(folder_path):
        print(f"Thư mục '{folder_path}' không tồn tại.")
        return []

    # Lấy danh sách các file XML trong thư mục
    xml_files = glob.glob(folder_path + '*.xml')

    anos = []  # Danh sách các đối tượng từ các file XML

    # Đọc nội dung của từng file XML và chuyển đổi thành đối tượng
    for xml_file in xml_files:

        # Đọc nội dung file XML và tạo cây phân tích cú pháp
        tree = ET.parse(xml_file)
        root = tree.getroot()

        path = "/content/TNCR/" + root.find('filename').text
        folder = root.find('folder').text
        width = int(root.find('size/width').text)
        height = int(root.find('size/height').text)
        depth = int(root.find('size/depth').text)

        size = [width, height, depth]

        objects=[]

        for object_ in root.iter('object'):
          
          name = object_.find('name').text
          pose = object_.find('pose').text
          truncated = object_.find('truncated').text
          difficult = object_.find('difficult').text

          ymin, xmin, ymax, xmax = None, None, None, None

          for box in object_.findall("bndbox"):
              ymin = float(box.find("ymin").text)
              xmin = float(box.find("xmin").text)
              ymax = float(box.find("ymax").text)
              xmax = float(box.find("xmax").text)

          bbox = [xmin, ymin, xmax, ymax] # PASCAL VOC   

          obj = PVOC(name = name, pose = pose, truncated = truncated, difficult = difficult, bbox = bbox)
          objects.append(obj)

        ano = Anotation(path = path, folder = folder, objects = objects, size = size)
        anos.append(ano)

    return anos


# Thay đổi folder_path thành đường dẫn thư mục chứa các file XML
folder_path = "/content/TNCR/"
anos = read_xml_files(folder_path)

# In thông tin các đối tượng
ano = anos[5]
print("Path:", ano.path)
print("Folder:", ano.folder)
print("Size:", ano.size)
print("bbox:")
for obj in ano.objects:
  print("  Name:", obj.name)
  print("  Pose:", obj.pose)
  print("  Truncated:", obj.truncated)
  print("  Difficult:", obj.difficult)
  print("  Bounding Box:", obj.bndbox )
  print("  -----------------------------")

In [None]:
import cv2
import matplotlib.pyplot as plt

def draw_bounding_boxes(images, indices):
    num_images = len(indices)
    fig, axes = plt.subplots(1, num_images, figsize=(5*3/4*num_images, 5))

    for i, index in enumerate(indices):
        ano = images[index - 1]
        image_path = ano.path
        image = cv2.imread(image_path)

        for obj in ano.objects:
          # Lấy thông tin bounding box
          xmin = obj.bndbox[0]
          ymin = obj.bndbox[1]
          xmax = obj.bndbox[2]
          ymax = obj.bndbox[3]

          # Vẽ bounding box lên ảnh
          cv2.rectangle(image, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 2)

          # Vẽ nhãn lên ảnh
          label = obj.name + ' - ' + obj.pose
          cv2.putText(image, label, (int(xmin), int(ymin)-10), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 2)

        # Hiển thị ảnh với bounding box trên subplot tương ứng
        axes[i].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        axes[i].axis('off')
        axes[i].set_title(ano.folder)

    plt.show()

In [None]:
import random
indices_to_draw = random.choices(range(len(anos)), k= 4)
print(indices_to_draw)
draw_bounding_boxes(anos, indices_to_draw)

In [None]:
list_label = []
for a in anos:
  label = [o.name for o in a.objects]
  for p in label:
    if(not p in list_label):
      list_label.append(p)

print(list_label)

In [None]:
import torch
import torch.nn as nn
from torchvision.models import resnet18
from tqdm import tqdm

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split

from PIL import Image

class CustomDataset(Dataset):
    def __init__(self, annotations, transform=None):
        self.annotations = annotations
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        annotation = self.annotations[idx]

        # Load image
        image = Image.open(annotation.path)

        # Apply transformations if specified
        if self.transform:
            image = self.transform(image)

        # Convert bounding box coordinates to tensor
        bboxes = torch.tensor(annotation.objects[0].bndbox)

        return image, bboxes

# Split dataset into train and test sets
train_annos, test_annos = train_test_split(anos, test_size=0.2, random_state=42)

# Define transformations for data augmentation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Create train and test datasets
train_dataset = CustomDataset(train_annos, transform=transform)
test_dataset = CustomDataset(test_annos, transform=transform)

# Create train and test data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the model
model = resnet18(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 4)  # 4 for bounding box coordinates (xmin, ymin, xmax, ymax)

# Define the loss function and optimizer
criterion = nn.SmoothL1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for images, bboxes in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
        images = images.to(device)
        bboxes = bboxes.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Calculate loss
        loss = criterion(outputs, bboxes)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print epoch loss
    epoch_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss}")

# Evaluation
model.eval()
total_loss = 0.0
with torch.no_grad():
    for images, bboxes in tqdm(test_loader, desc="Evaluation"):
        images = images.to(device)
        bboxes = bboxes.to(device)

        outputs = model(images)
        loss = criterion(outputs, bboxes)
        
        total_loss += loss.item()

    # Print evaluation loss
    evaluation_loss = total_loss / len(test_loader)
    print(f"Evaluation Loss: {evaluation_loss}")