In [2]:
import torch
import torch.nn as nn
from train_utils import *
from torchsummary import summary
import torch.nn.functional as F


# Data

In [3]:
import torch
from torchvision import transforms
class MyDataset(torch.utils.data.Dataset):
    """
    Initializes the dataset by reading the image and label files, and processes them into tensors.

    Args:
        image_path (str): Path to the input image.
        label_path (str): Path to the label file (YOLO format).

    The method reads the image from the specified path and the label data, then processes each labeled 
    bounding box to crop and transform the image into a tensor. The processed tensors and corresponding 
    labels are stored in `self.inputs` and `self.targets` respectively.
    """
    def __init__(self, image_path, label_path):
        super().__init__()
        
        self.image_path = image_path
        self.label_path = label_path
        self.inputs, self.targets = [], []
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize((32, 32))
        ])
        image = cv2.imread(self.image_path)
        labels, coords = read_file_to_tensors(self.label_path)
        for i, coord in enumerate(coords):
            square = find_yolov8_square(image, coord)
            cropped_image = get_box(image, square)
            cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB)
            cropped_tensor = self.transform(cropped_image)
            self.inputs.append(cropped_tensor)
            self.targets.append(labels[i])
    def __getitem__(self, idx):
        image = self.inputs[idx]
        label = self.targets[idx]
        return image, label
    def __len__(self):
        return len(self.inputs)

In [4]:
import os
image_folder = "/Users/phamminhtuan/Downloads/Trainning_SET/Images"
label_folder = "/Users/phamminhtuan/Downloads/Trainning_SET/Labels"
train = []

for filename in os.listdir(image_folder):
    if filename.endswith('.jpg') and 'iter_' in filename:
        # Lấy số từ tên file
        try:
            number = int(filename.split('iter_')[1].split('.jpg')[0])
            if 0 <= number <= 10:  # Kiểm tra nếu số nằm trong khoảng 0-10
                # Lấy đường dẫn file ảnh và nhãn
                image_path = os.path.join(image_folder, filename)
                label_path = os.path.join(label_folder, os.path.splitext(filename)[0] + ".txt")
                # Tạo dataset tạm thời
                temp = MyDataset(image_path, label_path)
                # Thêm các phần tử từ dataset vào train
                train.extend(temp[i] for i in range(len(temp)))
        except ValueError:
            # Bỏ qua file nếu không trích xuất được số hợp lệ
            pass

In [5]:
len(train)

115623

In [6]:
import os
image_folder = "/Users/phamminhtuan/Downloads/Trainning_SET/Images"
label_folder = "/Users/phamminhtuan/Downloads/Trainning_SET/Labels"
test = []
for filename in os.listdir(image_folder):
    if filename.endswith('iter_11.jpg'):  # Lọc các file hình ảnh
        image_path = os.path.join(image_folder, filename)
        label_path = os.path.join(label_folder, os.path.splitext(filename)[0] + ".txt")  # Giả định file nhãn cùng tên với file ảnh
        temp_test = MyDataset(image_path,label_path)
        for i in range(len(temp_test)):
            test.append(temp_test[i])

In [13]:
train_loader = torch.utils.data.DataLoader(train, batch_size=128, shuffle=True)
val_loader = torch.utils.data.DataLoader(test, batch_size=128, shuffle=True)
print (len(train_loader), len(val_loader))

904 89


# Model

In [17]:
device = torch.device("mps")

In [38]:
class EfficientCNN(nn.Module):
    def __init__(self):
        """
        Initializes the EfficientCNN model. This model is an efficient convolutional neural network for the 
        classification task. It consists of a feature extraction part and a classification part. The feature 
        extraction part is a convolutional neural network which consists of 3 convolutional layers with 
        maxpooling, where the number of channels are 32, 64, 128 respectively. The classification part is a 
        fully connected neural network which consists of 2 fully connected layers with dropout rate 0.5.

        Args:
            None
        """
        super(EfficientCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(128 * 4 * 4, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, 2)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


# Train

In [39]:
import pytorch_lightning as pl
import torchmetrics

In [40]:
model = EfficientCNN().to(device)
class MyDatasetPL(pl.LightningModule):
    def __init__(self, num_classes):
        super(MyDatasetPL, self).__init__()
        self.model = model
        self.train_accuracies = []

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        images, labels = batch
        logits = self(images)
        loss = F.cross_entropy(logits, labels)
        acc = self.accuracy(logits, labels)
        self.log('train_loss', loss, prog_bar=True)
        self.log('train_acc', acc, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        images, labels = batch
        logits = self(images)
        loss = F.cross_entropy(logits, labels)
        acc = self.accuracy(logits, labels)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=5e-4)

    def accuracy(self, logits, labels):
        _, predicted = torch.max(logits.data, 1)
        correct = (predicted == labels).sum().item()
        total = labels.size(0)
        out = correct / total
        return out

    def test_step(self, batch, batch_idx):
        images, labels = batch
        logits = self(images)
        loss = F.cross_entropy(logits, labels)
        acc = self.accuracy(logits, labels)
        self.log('test_loss', loss, prog_bar=True)
        self.log('test_acc', acc, prog_bar=True)
        return {'test_loss': loss, 'test_acc': acc}

In [41]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    monitor='val_acc',
    filename='best-checkpoint',
    save_top_k=1,
    mode='max',
    verbose=True
)
progress_bar = pl.callbacks.TQDMProgressBar()
PARAMS = {
    "benchmark": True,
    "enable_progress_bar": True,
    "logger": True,
    "callbacks": [progress_bar, checkpoint_callback],
    "log_every_n_steps": 1,
    "num_sanity_val_steps": 0,
    "max_epochs": 30,
    "precision": 16
}
trainer = pl.Trainer(**PARAMS)
Train = MyDatasetPL(num_classes=2)
trainer.fit(Train, train_loader, val_loader)


/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/lightning_fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pytorch_lightning/plugins/precision/amp.py:55: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type         | Params
---------------------------------------
0 | model | EfficientCNN | 618 K 
---------------------------------------
618 K     Trainable params
0         Non-trainable params
618 K     Total params
2.475     Total estimated model params size (MB)
/Library/Frameworks/Python.framework/Version

Training: |          | 0/? [00:00<?, ?it/s]



Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 0, global step 904: 'val_acc' reached 0.97008 (best 0.97008), saving model to '/Users/phamminhtuan/Desktop/AIChallenge/lightning_logs/version_4/checkpoints/best-checkpoint.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 1, global step 1808: 'val_acc' reached 0.97087 (best 0.97087), saving model to '/Users/phamminhtuan/Desktop/AIChallenge/lightning_logs/version_4/checkpoints/best-checkpoint.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 2, global step 2712: 'val_acc' reached 0.97122 (best 0.97122), saving model to '/Users/phamminhtuan/Desktop/AIChallenge/lightning_logs/version_4/checkpoints/best-checkpoint.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 3, global step 3616: 'val_acc' reached 0.97140 (best 0.97140), saving model to '/Users/phamminhtuan/Desktop/AIChallenge/lightning_logs/version_4/checkpoints/best-checkpoint.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 4, global step 4520: 'val_acc' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 5, global step 5424: 'val_acc' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 6, global step 6328: 'val_acc' was not in top 1
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


# Predict

In [46]:
best_model = MyDatasetPL.load_from_checkpoint(checkpoint_callback.best_model_path, num_classes=2)

In [47]:
image_path = "/Users/phamminhtuan/Desktop/Trainning_SET/Images/IMG_1587_iter_12.jpg"
label_path = "/Users/phamminhtuan/Desktop/Trainning_SET/Labels/IMG_1587_iter_12.txt"

In [48]:
def handle_predicted_zero(file_path, label, x1, y1, x2, y2):
    """
    Ghi thông tin vào file khi predicted == 0.
    
    Args:
        file_path (str): Đường dẫn tới file cần ghi.
        label (int): Nhãn dự đoán.
        x1, y1, x2, y2 (int): Tọa độ của hình chữ nhật.
    """
    with open(file_path, 'a') as f:  # Mở file ở chế độ append
        f.write(f"{label} {x1} {y1} {x2} {y2}\n")

In [49]:
picked_output_path = "/Users/phamminhtuan/Desktop/AIChallenge/picked_output.txt"
image = cv2.imread(image_path)
labels, coords = read_file_to_tensors(label_path)
for i, coord in enumerate(coords):
    xy_coord = find_yolov8_square(image, coord)
    x1, y1, x2, y2 = xy_coord
    input = get_box(image, xy_coord)
    
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((32, 32))
    ])
    input = transform(input)
    input = input.to(device)
    output = model(input.unsqueeze(0))
    _, predicted = torch.max(output.data, 1)

    if predicted == 0:
        # Vẽ hình chữ nhật lên ảnh
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # Ghi thông tin vào file picked_output.txt
        a1, b1, a2, b2 = coord
        handle_predicted_zero(picked_output_path, int(predicted.item()), a1, b1, a2, b2)

cv2.imwrite("/Users/phamminhtuan/Desktop/AIChallenge/avg_coords.jpg", image)

True