In [None]:
!pip install pytorch-metric-learning -q
!pip install matplotlib -q
!pip install tqdm -q
!pip install albumentations -q
!pip install timm -q

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import cv2
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from torch import optim
from tqdm import tqdm
from torchvision import models
import random
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.cuda import amp
from torch.optim import lr_scheduler
import torch.nn.functional as F
from pytorch_metric_learning import losses

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
INPUT_SIZE = (224, 224)
data_transform = A.Compose([A.Resize(INPUT_SIZE[0], INPUT_SIZE[1]),
                        A.HorizontalFlip(p=0.5),
                        A.VerticalFlip(p=0.5),
                        A.Rotate(limit=45, p=1.0),
                        A.CoarseDropout(
                                    max_holes=8,                
                                    max_height=16,              
                                    max_width=16,               
                                    min_holes=1,                
                                    min_height=8,               
                                    min_width=8,                
                                    fill_value=0,               
                                    p=0.5                        
                                ),
                        A.RandomBrightnessContrast(
                                brightness_limit=(-0.1,0.1), 
                                contrast_limit=(-0.1, 0.1), 
                                p=0.5),
                        A.Normalize(
                                mean=[0.485, 0.456, 0.406], 
                                std=[0.229, 0.224, 0.225], 
                                max_pixel_value=255.0, 
                                p=1.0),
                        ToTensorV2()], p=1.)

In [None]:
# Phi√™n b·∫£n ƒë∆°n gi·∫£n ch·ªâ v·ªõi Resize + Normalize
data_transform_simple = A.Compose([
    A.Resize(INPUT_SIZE[0], INPUT_SIZE[1]),
    A.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225], 
        max_pixel_value=255.0, 
        p=1.0
    ),
    ToTensorV2()
], p=1.)

# Phi√™n b·∫£n trung b√¨nh v·ªõi m·ªôt s·ªë augmentation c∆° b·∫£n
data_transform_moderate = A.Compose([
    A.Resize(INPUT_SIZE[0], INPUT_SIZE[1]),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(
        brightness_limit=(-0.1,0.1), 
        contrast_limit=(-0.1, 0.1), 
        p=0.3
    ),
    A.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225], 
        max_pixel_value=255.0, 
        p=1.0
    ),
    ToTensorV2()
], p=1.)

print("üìä So s√°nh c√°c phi√™n b·∫£n data augmentation:")
print("1. data_transform (hi·ªán t·∫°i): Nhi·ªÅu augmentation - t·ªët cho generalization nh∆∞ng c√≥ th·ªÉ l√†m ch·∫≠m training")
print("2. data_transform_simple: Ch·ªâ resize + normalize - training nhanh h∆°n, c√≥ th·ªÉ overfit")
print("3. data_transform_moderate: Trung b√¨nh - balance gi·ªØa speed v√† generalization")

## üîç Ph√¢n t√≠ch: T√°c ƒë·ªông c·ªßa vi·ªác ch·ªâ s·ª≠ d·ª•ng Resize

### ‚úÖ **∆Øu ƒëi·ªÉm khi ch·ªâ d√πng Resize + Normalize:**

1. **Training nhanh h∆°n**: √çt ph√©p bi·∫øn ƒë·ªïi ‚Üí t·ªëc ƒë·ªô training tƒÉng ƒë√°ng k·ªÉ
2. **·ªîn ƒë·ªãnh h∆°n**: √çt nhi·ªÖu trong qu√° tr√¨nh training ‚Üí loss curve m∆∞·ª£t h∆°n
3. **Ph√π h·ª£p v·ªõi d·ªØ li·ªáu ƒë√£ augmented**: N·∫øu dataset ƒë√£ ƒë∆∞·ª£c augment tr∆∞·ªõc th√¨ kh√¥ng c·∫ßn th√™m augmentation
4. **Memory usage th·∫•p h∆°n**: √çt operations ‚Üí √≠t GPU memory

### ‚ö†Ô∏è **Nh∆∞·ª£c ƒëi·ªÉm:**

1. **D·ªÖ overfit**: Model c√≥ th·ªÉ h·ªçc thu·ªôc l√≤ng training data
2. **Generalization k√©m**: K√©m kh·∫£ nƒÉng x·ª≠ l√Ω d·ªØ li·ªáu m·ªõi v·ªõi conditions kh√°c
3. **Robustness th·∫•p**: K√©m kh·∫£ nƒÉng ch·ªëng nhi·ªÖu, thay ƒë·ªïi √°nh s√°ng, g√≥c quay

### üéØ **Khuy·∫øn ngh·ªã:**

- **N·∫øu dataset l·ªõn (>10k images/class)**: C√≥ th·ªÉ d√πng simple transform
- **N·∫øu dataset nh·ªè (<5k images/class)**: N√™n gi·ªØ moderate augmentation
- **Cho Contrastive Learning**: Moderate augmentation th∆∞·ªùng t·ªët h∆°n v√¨ gi√∫p model h·ªçc ƒë∆∞·ª£c invariant features

In [None]:
# üß™ Test ƒë·ªÉ ch·ªçn transform ph√π h·ª£p
import time

def test_transform_speed(transform, num_samples=100):
    """Test t·ªëc ƒë·ªô c·ªßa transform"""
    # T·∫°o fake image
    fake_image = np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8)
    
    start_time = time.time()
    for _ in range(num_samples):
        transformed = transform(image=fake_image)["image"]
    end_time = time.time()
    
    avg_time = (end_time - start_time) / num_samples * 1000  # ms
    return avg_time

print("‚è±Ô∏è So s√°nh t·ªëc ƒë·ªô transform (ms/image):")
print(f"Original (nhi·ªÅu augment): {test_transform_speed(data_transform):.2f} ms")
print(f"Simple (ch·ªâ resize): {test_transform_speed(data_transform_simple):.2f} ms") 
print(f"Moderate (v·ª´a ph·∫£i): {test_transform_speed(data_transform_moderate):.2f} ms")

# ƒê·ªÉ test th·ª±c t·∫ø, b·∫°n c√≥ th·ªÉ thay ƒë·ªïi transform trong DATA class:
print("\nüí° ƒê·ªÉ test:")
print("1. Thay 'data_transform' b·∫±ng 'data_transform_simple' trong DATA class")
print("2. Train m·ªôt v√†i epoch v√† so s√°nh k·∫øt qu·∫£")
print("3. Quan s√°t validation accuracy v√† training speed")

In [None]:
def get_transform(mode="original"):
    """
    Utility function ƒë·ªÉ ch·ªçn transform
    mode: "original", "simple", "moderate"
    """
    INPUT_SIZE = (224, 224)
    
    if mode == "simple":
        return A.Compose([
            A.Resize(INPUT_SIZE[0], INPUT_SIZE[1]),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2()
        ], p=1.)
    
    elif mode == "moderate":
        return A.Compose([
            A.Resize(INPUT_SIZE[0], INPUT_SIZE[1]),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.3),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2()
        ], p=1.)
    
    else:  # original
        return A.Compose([
            A.Resize(INPUT_SIZE[0], INPUT_SIZE[1]),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.Rotate(limit=45, p=1.0),
            A.CoarseDropout(max_holes=8, max_height=16, max_width=16, min_holes=1, min_height=8, min_width=8, fill_value=0, p=0.5),
            A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2()
        ], p=1.)

# Example usage:
# train_data = DATA(train_path, get_transform("simple"), phase="train")

print("üîß S·ª≠ d·ª•ng: get_transform('simple'), get_transform('moderate'), ho·∫∑c get_transform('original')")
print("\nüìà Th·ªëng k√™ d·ª± ƒëo√°n:")
print("‚Ä¢ Simple: Training nhanh nh·∫•t, c√≥ th·ªÉ overfit n·∫øu dataset nh·ªè")
print("‚Ä¢ Moderate: Balance t·ªët gi·ªØa speed v√† performance") 
print("‚Ä¢ Original: Generalization t·ªët nh·∫•t nh∆∞ng training ch·∫≠m h∆°n")

In [None]:
class DATA(Dataset):
    def __init__(self, path, transform=None, phase="train"):
        self.path = path
        self.phase = phase
        self.transform = transform
        
        folders = os.listdir(path)
        self.image_paths = []  # ‚ùå Ch·ªâ l∆∞u ƒë∆∞·ªùng d·∫´n, kh√¥ng load ·∫£nh
        self.labels = []
        
        self.label_dict = {}
        for i, value in enumerate(folders):
            self.label_dict[value] = i
        print(self.label_dict)
        
        for image_folder in folders:
            items_path = os.path.join(self.path, image_folder)
            items_list = os.listdir(items_path)
            
            for image_name in items_list:
                image_path = os.path.join(items_path, image_name)
                self.image_paths.append(image_path)  # ‚úÖ Ch·ªâ l∆∞u path
                self.labels.append(self.label_dict[image_folder])
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        # ‚úÖ Load ·∫£nh khi c·∫ßn thi·∫øt
        image_path = self.image_paths[idx]
        image = cv2.imread(image_path)
        if image is not None:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        label = self.labels[idx]
        
        if self.phase == "train":
            if self.transform:
                image = self.transform(image=image)["image"]
            return image, torch.tensor(label, dtype=torch.long)
        else:
            if self.transform:
                image = self.transform(image=image)["image"]
            return image, torch.tensor(label, dtype=int)

In [None]:
import time
start = time.time()
train_path = "/kaggle/input/data-augmented-model-2/augmented_data_model_2"
train_data =  DATA(train_path, data_transform, phase = "train")
end = time.time()
print(f"Load time: {round(end - start, 4)} s")
len(train_data)

In [None]:
anchor_img, label = train_data[100] # image at index = 100
img_np = anchor_img.numpy()
img_np = np.transpose(img_np, (1,2,0))

plt.imshow(img_np)
plt.show()

In [None]:
batch_size =  32 # Batch = 32 l√† max khi train v·ªõi colab v√† kaggle, n·∫øu l·ªõn h∆°n th√¨ out of memory -- Vram c√≥ 16gb th√¥i
train_loader = DataLoader(train_data,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers = os.cpu_count()
                          )

In [None]:
class SupervisedContrastiveLoss(nn.Module):
    """
    Supervised Contrastive Loss implementation - Improved version
    Ref: https://arxiv.org/abs/2004.11362
    """
    def __init__(self, temperature=0.1, base_temperature=0.07):
        super(SupervisedContrastiveLoss, self).__init__()
        self.temperature = temperature
        self.base_temperature = base_temperature

    def forward(self, features, labels):
        """
        Args:
            features: hidden vector of shape [bsz, feature_dim]
            labels: ground truth of shape [bsz].
        Returns:
            A loss scalar.
        """
        device = features.device
        batch_size = features.shape[0]
        
        # Ensure labels are correct type
        labels = labels.long().view(-1)
        
        if len(features.shape) < 3:
            features = features.unsqueeze(1)
            
        # Normalize features
        features = F.normalize(features, p=2, dim=2)
        
        contrast_count = features.shape[1]
        contrast_feature = torch.cat(torch.unbind(features, dim=1), dim=0)
        
        anchor_feature = contrast_feature
        anchor_count = contrast_count
        
        # Compute logits
        anchor_dot_contrast = torch.div(
            torch.matmul(anchor_feature, contrast_feature.T),
            self.temperature)
        
        # For numerical stability
        logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True)
        logits = anchor_dot_contrast - logits_max.detach()
        
        # Create label mask
        labels = labels.contiguous().view(-1, 1)
        if labels.shape[0] != batch_size:
            raise ValueError('Num of labels does not match num of features')
            
        mask = torch.eq(labels, labels.T).float().to(device)
        mask = mask.repeat(anchor_count, contrast_count)
        
        # Mask-out self-contrast cases
        logits_mask = torch.scatter(
            torch.ones_like(mask),
            1,
            torch.arange(batch_size * anchor_count).view(-1, 1).to(device),
            0
        )
        mask = mask * logits_mask
        
        # Compute log_prob
        exp_logits = torch.exp(logits) * logits_mask
        log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True) + 1e-8)
        
        # Compute mean of log-likelihood over positive
        # Only compute loss for samples that have positive pairs
        valid_samples = mask.sum(1) > 0
        if valid_samples.sum() == 0:
            return torch.tensor(0.0, requires_grad=True).to(device)
            
        mean_log_prob_pos = (mask * log_prob).sum(1) / (mask.sum(1) + 1e-8)
        
        # Loss
        loss = - (self.temperature / self.base_temperature) * mean_log_prob_pos[valid_samples]
        loss = loss.mean()
        
        return loss

In [None]:
import torch
import torch.nn as nn
from torchvision import models

class Network(nn.Module):
    def __init__(self, emb_dim=128):
        super(Network, self).__init__()

        base_model = models.resnet50(pretrained=True)

        # B·ªè layer cu·ªëi c√πng (fc)
        self.backbone = nn.Sequential(*list(base_model.children())[:-1])  # Output: [B, 2048, 1, 1]

        # FC Head
        self.fc = nn.Sequential(
            nn.Linear(2048, 512),
            nn.PReLU(),
            nn.Linear(512, emb_dim)
        )

    def forward(self, x):
        x = self.backbone(x)           # [B, 2048, 1, 1]
        x = torch.flatten(x, 1)        # [B, 2048]
        x = self.fc(x)                 # [B, emb_dim]
        return x

In [None]:
model = Network(256).to(device)
x = torch.rand([32, 3, 224, 224]).to(device) # input random
output = model(x)
print(output.shape) # output is torch.Size([32, 256]) -> good

In [None]:
embedding_dims = 256 #
model = Network(embedding_dims).to(device)
criterion = SupervisedContrastiveLoss(temperature=0.1).to(device) 
optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-6)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-6)

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
def TEST(folder_path, model, transforms, key):

    label_org = []
    dir_org = []
    label_test = []
    dir_test_path = []
    dir_org_path = []
    REFER_DICT = {}

    # Put the model in evaluation mode
    model.eval()

    # Disable gradient calculation
    with torch.no_grad():
        # Iterate over each subfolder in the folder_path
        for label_index, subfolder_name in enumerate(os.listdir(folder_path)):
            REFER_DICT[label_index] = subfolder_name
            subfolder_path = os.path.join(folder_path, subfolder_name)
            image_files = os.listdir(subfolder_path)
            for image_index, image_file in enumerate(image_files):

                image_path = os.path.join(subfolder_path, image_file)

                if key in image_path:

                    image = Image.open(image_path).convert('RGB')
                    image = transforms(image=np.array(image))["image"]
                    # Extract the embedding for the first image in the folder
                    embedding = model(image.unsqueeze(0).to("cuda"))
                    dir_org.append(embedding)
                    label_org.append(label_index)
                    dir_org_path.append(image_path)
                else:
                    # Store the path and label for other images
                    dir_test_path.append(image_path)
                    label_test.append(label_index)

        predict_label = []
        Max_sim = []

        if not dir_org:  # N·∫øu kh√¥ng c√≥ ·∫£nh reference
            print(f"Warning: No reference images found with key '{key}'")
            return 0.0

        # Iterate over test images
        for test_image_path in dir_test_path:

            test_image = Image.open(test_image_path).convert('RGB')
            test_image = transforms(image=np.array(test_image))["image"]

            # Extract the embedding for the test image
            test_embedding = model(test_image.unsqueeze(0).to("cuda"))
            similarities = []

            # Calculate cosine similarity with each original embedding
            for org_embedding in dir_org:

                cosine_sim = cosine_similarity(org_embedding.cpu().detach().numpy(), test_embedding.cpu().detach().numpy())
                similarities.append(cosine_sim[0][0])

            if similarities:
                Max_sim.append(max(similarities))
                max_similarity_index = np.argmax(similarities)
                predict_label.append(label_org[max_similarity_index])
            else:
                print(f"Warning: No similarities calculated for {test_image_path}")
                predict_label.append(-1)  # Ho·∫∑c m·ªôt gi√° tr·ªã m·∫∑c ƒë·ªãnh
        if not predict_label or not label_test:
            print("Warning: No predictions or labels to evaluate")
            return 0.0
                
        accuracy = accuracy_score(predict_label, label_test)
        print(f'----Key: {key}')
        print(f'----Number of test images: {len(label_test)}')
        print(f'----Number of reference images: {len(label_org)}')
        print(f'----Accuracy: {accuracy:.4f}')
        print()

        return accuracy

preprocess = A.Compose([
        A.Resize(224, 224),
        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2()], p=1.)

In [None]:
import pandas as pd
import json
from datetime import datetime
import signal
import sys

# Kh·ªüi t·∫°o lists ƒë·ªÉ l∆∞u training history
train_history = {
    'epoch': [],
    'loss': [],
    'accuracy': [],
    'learning_rate': [],
    'timestamp': []
}

In [None]:
def save_final_results():
    os.makedirs('/kaggle/working/outputs', exist_ok=True)
    
    # Save training history
    df_history = pd.DataFrame(train_history)
    df_history.to_csv('/kaggle/working/outputs/training_history.csv', index=False)
    
    # Save final model
    final_model_path = f"/kaggle/working/outputs/final_model_epoch_{epochs}.pth"
    torch.save({
        'epoch': epochs,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'final_accuracy': ACC,
        'training_history': train_history
    }, final_model_path)
    
    # Save summary
    summary = {
        'total_epochs': epochs,
        'best_accuracy': float(ACC),
        'final_loss': float(train_history['loss'][-1]) if train_history['loss'] else 0.0,
        'best_model_path': best_model_path,
        'final_model_path': final_model_path,
        'training_completed': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'total_samples': len(train_data),
        'batch_size': batch_size,
        'embedding_dims': embedding_dims
    }
    
    with open('/kaggle/working/outputs/training_summary.json', 'w') as f:
        json.dump(summary, f, indent=2)
    
    results_df = pd.DataFrame([summary])
    results_df.to_csv('/kaggle/working/outputs/final_results.csv', index=False)
    
    print(f"‚úÖ Results saved! Best Accuracy: {ACC:.4f}")

# Signal handler
def signal_handler(sig, frame):
    print('\nTraining interrupted! Saving current progress...')
    save_final_results()
    sys.exit(0)

signal.signal(signal.SIGINT, signal_handler)

In [None]:
epochs = 50
model.train()
scaler = torch.amp.GradScaler()

ACC = 0
best_model_path = None

# ‚úÖ S·ª≠a l·ªói typo ƒë∆∞·ªùng d·∫´n
os.makedirs('/kaggle/working/outputs', exist_ok=True)

for epoch in tqdm(range(epochs), desc="Epochs"):
    running_loss = []

    for step, (anchor_img, label) in enumerate(train_loader):
        anchor_img = anchor_img.to(device).float()
        label = label.to(device).long()

        with torch.amp.autocast('cuda', enabled=True):
            outputs = model(anchor_img)
            loss = criterion(outputs, label)
            loss = loss / 4
            
        scaler.scale(loss).backward()
        
        if (step + 1) % 4 == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            scheduler.step()

            if (step + 1) % 20 == 0:
                torch.cuda.empty_cache()

        running_loss.append(loss.cpu().detach().numpy())

    epoch_loss = np.mean(running_loss)
    current_lr = optimizer.param_groups[0]['lr']

    if (epoch + 1) % 10 == 0:
        test_path = "/kaggle/input/logo-verify-test/logo_verify_test"
        accuracy = TEST(test_path, model, preprocess, key="000000")
        model.train()
        
        if accuracy >= ACC:
            if best_model_path and os.path.exists(best_model_path):
                os.remove(best_model_path)
            
            # ‚úÖ ƒê·∫£m b·∫£o th∆∞ m·ª•c t·ªìn t·∫°i
            os.makedirs('/kaggle/working/outputs', exist_ok=True)
            best_model_path = f"/kaggle/working/outputs/model_best_epoch_{epoch+1}_acc_{accuracy:.4f}.pth"
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'accuracy': accuracy,
                'loss': epoch_loss
            }, best_model_path)
            ACC = accuracy
    else:
        accuracy = None
    
    # L∆∞u training history
    train_history['epoch'].append(epoch + 1)
    train_history['loss'].append(float(epoch_loss))
    train_history['accuracy'].append(float(accuracy) if accuracy is not None else None)
    train_history['learning_rate'].append(float(current_lr))
    train_history['timestamp'].append(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    
    print("Epoch: {}/{} - Loss: {:.4f} - LR: {:.2e}{}".format(
        epoch+1, epochs, epoch_loss, current_lr,
        f" - Accuracy: {accuracy:.4f}" if accuracy is not None else ""
    ))
    
    # ‚úÖ ƒê·∫£m b·∫£o th∆∞ m·ª•c t·ªìn t·∫°i tr∆∞·ªõc m·ªói l·∫ßn l∆∞u
    if (epoch + 1) % 5 == 0:
        os.makedirs('/kaggle/working/outputs', exist_ok=True)
        df_history = pd.DataFrame(train_history)
        df_history.to_csv('/kaggle/working/outputs/training_history.csv', index=False)
        print(f"Training history saved at epoch {epoch+1}")

print("\n" + "="*50)
print("TRAINING COMPLETED!")
print("="*50)
save_final_results()