# Cài đặt thư viện cần thiết

In [None]:
!pip install pytorch-metric-learning -q

# Library

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import cv2
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from torch import optim
from tqdm import tqdm
from torchvision import models
import random
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.cuda import amp
from torch.optim import lr_scheduler
import torch.nn.functional as F
from pytorch_metric_learning import losses

# Device

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Transform

In [None]:
INPUT_SIZE = (224, 224)
data_transform = A.Compose([A.Resize(INPUT_SIZE[0], INPUT_SIZE[1]),
                        A.HorizontalFlip(p=0.5),
                        A.VerticalFlip(p=0.5),
                        A.Rotate(limit=45, p=1.0),
                        A.CoarseDropout(
                                    max_holes=8,                
                                    max_height=16,              
                                    max_width=16,               
                                    min_holes=1,                
                                    min_height=8,               
                                    min_width=8,                
                                    fill_value=0,               
                                    p=0.5                        
                                ),
                        A.RandomBrightnessContrast(
                                brightness_limit=(-0.1,0.1), 
                                contrast_limit=(-0.1, 0.1), 
                                p=0.5),
                        A.Normalize(
                                mean=[0.485, 0.456, 0.406], 
                                std=[0.229, 0.224, 0.225], 
                                max_pixel_value=255.0, 
                                p=1.0),
                        ToTensorV2()], p=1.)

# DataLoader

In [None]:
class DATA(Dataset):
  def __init__(self, path, transform = None, phase = "train"):
    self.path = path
    self.phase = phase
    self.transform = transform

    folders = os.listdir(path)
    LEN = 0
    IMAGE = []
    LABEL = []

    NOTE_LABEL = {}
    for i, value in enumerate(folders):
      NOTE_LABEL[value] = i
    print(NOTE_LABEL)

    for image_folder in folders:
      items_path = os.path.join(self.path, image_folder)
      items_list = os.listdir(items_path)

      LEN = LEN + len(items_list)
      for image_name in items_list:
        image_path = os.path.join(items_path, image_name)
        image = cv2.imread(image_path)
        if image is not None:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            IMAGE.append(image)
            LABEL.append(NOTE_LABEL[image_folder])

    self.LEN = len(IMAGE)
    self.IMAGE = IMAGE
    self.LABEL = LABEL

  def __len__(self):
    return self.LEN

  def __getitem__(self, idx):

    anchor_img = self.IMAGE[idx]
    anchor_label = self.LABEL[idx]
    
    if self.phase == "train":
        
        if self.transform:
            anchor_img = self.transform(image=anchor_img)["image"]
            
        return anchor_img, torch.tensor(anchor_label, dtype=torch.float32)
    else:
        if self.transform:
            anchor_img = self.transform(image=anchor_img)["image"]
        return anchor_img, torch.tensor(anchor_label, dtype=int)

In [None]:
import time
start = time.time()
train_path = "/kaggle/input/model2-94logo/data_classification"
train_data =  DATA(train_path, data_transform, phase = "train")
end = time.time()
print(f"Load time: {round(end - start, 4)} s")
len(train_data)

### Show check data

In [None]:
anchor_img, label = train_data[100] # image at index = 100
img_np = anchor_img.numpy()
img_np = np.transpose(img_np, (1,2,0))

plt.imshow(img_np)
plt.show()

In [None]:
batch_size =  32 # Batch = 32 là max khi train với colab và kaggle, nếu lớn hơn thì out of memory -- Vram có 16gb thôi
train_loader = DataLoader(train_data,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers = os.cpu_count()
                          )

# SupCon Loss

In [None]:
class SupervisedContrastiveLoss(nn.Module):
    def __init__(self, temperature=0.1):
        super(SupervisedContrastiveLoss, self).__init__()
        self.temperature = temperature
        self.loss_fn = losses.ContrastiveLoss(temperature=self.temperature)

    def forward(self, features, labels):
        return self.loss_fn(features, labels)

In [None]:
# class SupervisedContrastiveLoss(nn.Module):
#     def __init__(self, temperature=0.2):
#         super(SupervisedContrastiveLoss, self).__init__()
#         self.temperature = temperature

#     def forward(self, feature_vectors, labels):
#         # Normalize feature vectors
#         feature_vectors_normalized = F.normalize(feature_vectors, p=2, dim=1)
#         # Compute logits
#         logits = torch.div(
#             torch.matmul(
#                 feature_vectors_normalized, torch.transpose(feature_vectors_normalized, 0, 1)
#             ),
#             self.temperature,
#         )
#         return losses.NTXentLoss(temperature=0.07)(logits, torch.squeeze(labels))

# BackBone - emb_dim = 256

### VGG16

##### #Custom FC head

In [None]:

model_ = models.vgg16(pretrained=True)
class Network(nn.Module):
    def __init__(self, emb_dim=128):
        super(Network, self).__init__()
        self.conv = model_.features

        self.fc = nn.Sequential(
            nn.Linear(512*7*7, 512),
            nn.PReLU(),
            nn.Linear(512, emb_dim)
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

##### #Get Full Head

In [None]:
# model_ = models.vgg16(pretrained=True)

# class Network(nn.Module):
#     def __init__(self, emb_dim=128):
#         super(Network, self).__init__()
#         self.conv = model_.features
        
#         # Full head của VGG16 gồm 3 Linear layers
#         self.base_classifier = model_.classifier  # Gồm cả 1000 lớp output

#         # Sau lớp 1000-d, thêm PReLU và chuyển về emb_dim
#         self.tail = nn.Sequential(
#             nn.PReLU(),
#             nn.Linear(1000, emb_dim)
#         )

#     def forward(self, x):
#         x = self.conv(x)
#         x = x.view(x.size(0), -1)
#         x = self.base_classifier(x)
#         x = self.tail(x)
#         return x

### VGG19

##### #Custom FC head

In [None]:
model_ = models.vgg19(pretrained=True)
class Network(nn.Module):
    def __init__(self, emb_dim=128):
        super(Network, self).__init__()
        self.conv = model_.features

        self.fc = nn.Sequential(
            nn.Linear(512*7*7, 512),
            nn.PReLU(),
            nn.Linear(512, emb_dim)
        )

    def forward(self, x):
        x = self.conv(x)
        print(x.shape)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

##### #Get Full Head

In [None]:
# model_ = models.vgg19(pretrained=True)

# class Network(nn.Module):
#     def __init__(self, emb_dim=128):
#         super(Network, self).__init__()
#         self.conv = model_.features
        
#         self.base_classifier = model_.classifier

#         self.tail = nn.Sequential(
#             nn.PReLU(),
#             nn.Linear(1000, emb_dim)
#         )

#     def forward(self, x):
#         x = self.conv(x)
#         x = x.view(x.size(0), -1)
#         x = self.base_classifier(x)
#         x = self.tail(x)
#         return x

### ResNet18

In [None]:
import torch
import torch.nn as nn
from torchvision import models

class Network(nn.Module):
    def __init__(self, emb_dim=128):
        super(Network, self).__init__()

        base_model = models.resnet18(pretrained=True)  
        self.backbone = nn.Sequential(*list(base_model.children())[:-1])  
        self.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.PReLU(),
            nn.Linear(256, emb_dim)
        )

    def forward(self, x):
        x = self.backbone(x)           # [B, 512, 1, 1]
        x = torch.flatten(x, 1)        # [B, 512]
        x = self.fc(x)                 # [B, emb_dim]
        return x


### ResNet50

In [None]:
import torch
import torch.nn as nn
from torchvision import models

class Network(nn.Module):
    def __init__(self, emb_dim=128):
        super(Network, self).__init__()

        base_model = models.resnet50(pretrained=True)

        # Bỏ layer cuối cùng (fc)
        self.backbone = nn.Sequential(*list(base_model.children())[:-1])  # Output: [B, 2048, 1, 1]

        # FC Head
        self.fc = nn.Sequential(
            nn.Linear(2048, 512),
            nn.PReLU(),
            nn.Linear(512, emb_dim)
        )

    def forward(self, x):
        x = self.backbone(x)           # [B, 2048, 1, 1]
        x = torch.flatten(x, 1)        # [B, 2048]
        x = self.fc(x)                 # [B, emb_dim]
        return x

### MobileNetV2


In [None]:
import torch
import torch.nn as nn
from torchvision import models

class Network(nn.Module):
    def __init__(self, emb_dim=128):
        super(Network, self).__init__()

        base_model = models.mobilenet_v2(pretrained=True)

        # Bỏ classifier, giữ lại feature extractor
        self.backbone = base_model.features  # Output: [B, 1280, 7, 7]

        # Global Average Pooling + FC head
        self.pool = nn.AdaptiveAvgPool2d((1, 1))  # Output: [B, 1280, 1, 1]

        self.fc = nn.Sequential(
            nn.Linear(1280, 512),
            nn.PReLU(),
            nn.Linear(512, emb_dim)
        )

    def forward(self, x):
        x = self.backbone(x)           # [B, 1280, 7, 7]
        x = self.pool(x)               # [B, 1280, 1, 1]
        x = torch.flatten(x, 1)        # [B, 1280]
        x = self.fc(x)                 # [B, emb_dim]
        return x


# MobileNetv3 - Small

In [None]:
import torch
import torch.nn as nn
from torchvision import models

class Network(nn.Module):
    def __init__(self, emb_dim=128):
        super(Network, self).__init__()

        base_model = models.mobilenet_v3_small(pretrained=True)

        # Dùng phần features của model (output: [B, 576, 7, 7])
        self.backbone = base_model.features

        # Global Average Pooling
        self.pool = nn.AdaptiveAvgPool2d((1, 1))  # Output: [B, 576, 1, 1]

        # FC Head
        self.fc = nn.Sequential(
            nn.Linear(576, 256),
            nn.PReLU(),
            nn.Linear(256, emb_dim)
        )

    def forward(self, x):
        x = self.backbone(x)   # [B, 576, 7, 7]
        x = self.pool(x)       # [B, 576, 1, 1]
        x = torch.flatten(x, 1)  # [B, 576]
        x = self.fc(x)         # [B, emb_dim]
        return x


# MobileNetv3 - Large

In [None]:
import torch
import torch.nn as nn
from torchvision import models

class Network(nn.Module):
    def __init__(self, emb_dim=128):
        super(Network, self).__init__()

        base_model = models.mobilenet_v3_large(pretrained=True)

        # Lấy phần feature extractor
        self.backbone = base_model.features  # Output: [B, 960, 7, 7]

        # Global Average Pooling
        self.pool = nn.AdaptiveAvgPool2d((1, 1))  # [B, 960, 1, 1]

        # FC head
        self.fc = nn.Sequential(
            nn.Linear(960, 512),
            nn.PReLU(),
            nn.Linear(512, emb_dim)
        )

    def forward(self, x):
        x = self.backbone(x)   # [B, 960, 7, 7]
        x = self.pool(x)       # [B, 960, 1, 1]
        x = torch.flatten(x, 1)  # [B, 960]
        x = self.fc(x)         # [B, emb_dim]
        return x
 
 

# Test model forward

In [None]:
model = Network(256).to(device)
x = torch.rand([32, 3, 224, 224]).to(device) # input random
output = model(x)
print(output.shape) # output is torch.Size([32, 256]) -> good

# Setup

In [None]:
embedding_dims = 256 #
model = Network(embedding_dims).to(device)
criterion = SupervisedContrastiveLoss(temperature=0.1).to(device) 
optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-6)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-6)

# Test Function

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
def TEST(folder_path, model, transforms, key):

    label_org = []
    dir_org = []
    label_test = []
    dir_test_path = []
    dir_org_path = []
    REFER_DICT = {}

    # Put the model in evaluation mode
    model.eval()

    # Disable gradient calculation
    with torch.no_grad():
        # Iterate over each subfolder in the folder_path
        for label_index, subfolder_name in enumerate(os.listdir(folder_path)):
            REFER_DICT[label_index] = subfolder_name
            subfolder_path = os.path.join(folder_path, subfolder_name)
            image_files = os.listdir(subfolder_path)
            for image_index, image_file in enumerate(image_files):

                image_path = os.path.join(subfolder_path, image_file)

                if key in image_path:

                    image = Image.open(image_path).convert('RGB')
                    image = transforms(image=np.array(image))["image"]
                    # Extract the embedding for the first image in the folder
                    embedding = model(image.unsqueeze(0).to("cuda"))
                    dir_org.append(embedding)
                    label_org.append(label_index)
                    dir_org_path.append(image_path)
                else:
                    # Store the path and label for other images
                    dir_test_path.append(image_path)
                    label_test.append(label_index)

        predict_label = []
        Max_sim = []

        # Iterate over test images
        for test_image_path in dir_test_path:

            test_image = Image.open(test_image_path).convert('RGB')
            test_image = transforms(image=np.array(test_image))["image"]

            # Extract the embedding for the test image
            test_embedding = model(test_image.unsqueeze(0).to("cuda"))
            similarities = []

            # Calculate cosine similarity with each original embedding
            for org_embedding in dir_org:

                cosine_sim = cosine_similarity(org_embedding.cpu().detach().numpy(), test_embedding.cpu().detach().numpy())
                similarities.append(cosine_sim[0][0])
            Max_sim.append(max(similarities))
            max_similarity_index = np.argmax(similarities)

            predict_label.append(label_org[max_similarity_index])

        accuracy = accuracy_score(predict_label, label_test)


        
        print(f'----Accuracy: {accuracy:.4f}')
        print()

        return accuracy

preprocess = A.Compose([
        A.Resize(224, 224),
        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2()], p=1.)

# Train Loop

In [None]:
epochs = 100
model.train()
scaler = torch.amp.GradScaler()

ACC = 0

for epoch in tqdm(range(epochs), desc="Epochs"):
    running_loss = []
    for step, (anchor_img, label) in enumerate(train_loader):
        anchor_img = anchor_img.to(device).float()
        label = label.to(device)
        with torch.amp.autocast('cuda',enabled=True):
            outputs = model(anchor_img)
            loss = criterion(outputs, label)
            loss = loss / 4
            
        scaler.scale(loss).backward()
        
        if (step + 1) % 4 == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            scheduler.step()      
            
        running_loss.append(loss.cpu().detach().numpy())
    if (epoch + 1) % 10 == 0:
        test_path = "/kaggle/input/test-set-seg-extract/TEST_SET" # Đổi địa chỉ của cái này
        accuracy = TEST(test_path, model, preprocess, key = "000000")
        
            
        model.train()
        
        if accuracy >= ACC:
            pth = f"/kaggle/working/model_best{accuracy*1000}.pth"  # Đổi địa chỉ của cái này
            torch.save(model, pth)
            ACC = accuracy
            
    print("Epoch: {}/{} - Loss: {:.4f}".format(epoch+1, epochs, np.mean(running_loss)))