# Sys check

In [1]:
import os
root = os.getcwd()
root

'/kaggle/working'

In [2]:
!nvidia-smi

Tue Apr 15 15:21:33 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off |   00000000:00:04.0 Off |                    0 |
| N/A   33C    P0             27W /  250W |       0MiB /  16384MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
import torch 
print(torch.__version__)
print(torch.cuda.is_available())

2.5.1+cu124
True


# Build

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report

import os
import matplotlib.pyplot as plt

In [5]:
def training(model, optimizer, criterion, train_loader, val_loader, num_epochs=10, device='cuda'):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"[Epoch {epoch+1}/{num_epochs}] Loss: {running_loss / len(train_loader):.4f}")
        
        # Evaluate on validation set
        print("Validation performance:")
        evaluate(model, val_loader)

In [100]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),  
    transforms.RandomRotation(10),
    transforms.RandomAffine(degrees=0, 
                            translate=(0.1, 0.1)),  # (6) Dịch vị trí
    transforms.ColorJitter(brightness=0.1),     # (5) Điều chỉnh độ sáng 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet mean/std
                         std=[0.229, 0.224, 0.225])
])

In [142]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 4
batch_size = 8
num_epochs = 10
learning_rate = 1e-5
num_images_per_sample=4

In [8]:
# Hàm đánh giá
def evaluate(model, dataloader):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    report = classification_report(y_true, y_pred, target_names=["bào ngư xám + trắng", "Đùi gà Baby (cắt ngắn)", "nấm mỡ", "linh chi trắng"])
    print(report)


In [55]:
from torch.utils.data import Dataset
from PIL import Image
import glob
import os
import torch
import re

class MultiImageMushroomDataset(Dataset):
    def __init__(self, root_dir, transform=None, num_images_per_sample=4):
        """
        Dataset phân loại nấm sử dụng nhiều ảnh một mẫu.

        Args:
            root_dir (str): Thư mục gốc chứa các class folders.
            transform (callable, optional): Transform áp dụng lên ảnh.
            num_images_per_sample (int): Số ảnh muốn nhóm lại thành 1 sample (mặc định 4).
        """
        self.samples = []
        self.transform = transform
        self.num_images_per_sample = num_images_per_sample

        # Map từ prefix sang class label
        self.prefix2class = {
            'NM': 'nấm mỡ',
            'BN': 'bào ngư xám + trắng',
            'DG': 'Đùi gà Baby (cắt ngắn)',
            'LC': 'linh chi trắng'
        }
        self.class_names = list(self.prefix2class.values())
        self.class2idx = {name: idx for idx, name in enumerate(self.class_names)}

        # Duyệt từng class folder
        for class_name in self.class_names:
            class_path = os.path.join(root_dir, class_name)
            images = sorted(glob.glob(os.path.join(class_path, "*.jpg")))

            # Gom ảnh theo prefix (BM, AB, ...)
            prefix_groups = {}
            for img_path in images:
                filename = os.path.basename(img_path)
                match = re.match(r"([A-Z]{2})\d+", filename)
                if match:
                    prefix = match.group(1)
                    if prefix not in prefix_groups:
                        prefix_groups[prefix] = []
                    prefix_groups[prefix].append(img_path)

            # Tạo samples từ các nhóm ảnh
            for prefix, img_list in prefix_groups.items():
                img_list = sorted(img_list)
                label_idx = self.class2idx[self.prefix2class[prefix]]

                for i in range(0, len(img_list), self.num_images_per_sample):
                    selected = img_list[i:i+self.num_images_per_sample]
                    if len(selected) < self.num_images_per_sample:
                        selected = (selected + [selected[0]] * self.num_images_per_sample)[:self.num_images_per_sample]
                    self.samples.append((selected, label_idx))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_paths, label = self.samples[idx]
        imgs = []

        for path in img_paths:
            image = Image.open(path).convert("RGB")
            if self.transform:
                image = self.transform(image)
            imgs.append(image)

        return torch.stack(imgs), torch.tensor(label)


In [56]:
from torch.utils.data import DataLoader, random_split
import torch
import os
from tqdm.notebook import tqdm  # For progress bars in notebooks

def create_dataloaders(dataset_root, transform, num_images_per_sample=2, 
                      batch_size=16, val_split=0.05, seed=42, 
                      num_workers=0, pin_memory=True):
    """
    Create optimized train and validation dataloaders with better error handling
    and performance settings.
    
    Args:
        dataset_root (str): Root directory for dataset
        transform: Data transformations to apply
        num_images_per_sample (int): Number of images per sample
        batch_size (int): Batch size for training
        val_split (float): Validation split ratio (0-1)
        seed (int): Random seed for reproducibility
        num_workers (int): Number of workers for data loading (0 for no multiprocessing)
        pin_memory (bool): Whether to pin memory for faster GPU transfer
        
    Returns:
        tuple: (train_loader, val_loader)
    """
    # Check if dataset directory exists
    if not os.path.exists(dataset_root):
        raise FileNotFoundError(f"Dataset directory not found: {dataset_root}")
    
    print(f"Loading dataset from {dataset_root}...")
    
    # Create dataset with progress reporting
    try:
        dataset = MultiImageMushroomDataset(
            root_dir=dataset_root,
            transform=transform,
            num_images_per_sample=num_images_per_sample
        )
    except Exception as e:
        print(f"Error creating dataset: {str(e)}")
        raise
    
    # Display dataset info
    print(f"Dataset loaded: {len(dataset)} samples")
    
    # Set random seed for reproducible splits
    generator = torch.Generator().manual_seed(seed)
    
    # Calculate split sizes
    val_size = int(len(dataset) * val_split)
    train_size = len(dataset) - val_size
    
    # Split dataset
    train_dataset, val_dataset = random_split(
        dataset, [train_size, val_size], generator=generator
    )
    
    print(f"Split: {train_size} training samples, {val_size} validation samples")
    
    # Create dataloaders with optimized settings
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True, 
        num_workers=num_workers,  # Set to 0 to avoid pickle errors
        pin_memory=pin_memory,    # Faster data transfer to GPU
        drop_last=False,          # Use all samples
        persistent_workers=(num_workers > 0),  # Keep workers alive between epochs
    )
    
    val_loader = DataLoader(
        val_dataset, 
        batch_size=batch_size, 
        shuffle=False,  # No need to shuffle validation data
        num_workers=num_workers, 
        pin_memory=pin_memory,
    )
    
    return train_loader, val_loader


In [123]:
# Create dataloaders with optimized settings
train_loader, val_loader = create_dataloaders(
    dataset_root="/kaggle/input/aio-hutech/train",
    transform=transform,
    num_images_per_sample=num_images_per_sample,
    batch_size=batch_size,
    num_workers=0,  # Fix pickle error by using 0 workers
    pin_memory=torch.cuda.is_available(),  # Only pin if CUDA is available
)

Loading dataset from /kaggle/input/aio-hutech/train...
Dataset loaded: 300 samples
Split: 285 training samples, 15 validation samples


In [124]:
len(train_loader)

36

In [125]:
for images, labels in val_loader:
    print("Label batch:", labels)
    print("Max label:", labels.max())
    print("Min label:", labels.min())
    break


Label batch: tensor([1, 1, 1, 0, 3, 2, 3, 1])
Max label: tensor(3)
Min label: tensor(0)


In [126]:
import timm
import torch
import torch.nn as nn

class ViT_MushroomClassifier(nn.Module):
    def __init__(self, vit_model_name='vit_base_patch16_224', num_classes=4):
        super(ViT_MushroomClassifier, self).__init__()
        self.vit = timm.create_model(vit_model_name, pretrained=True)
        self.vit.head = nn.Identity()  # Bỏ classification head của ViT

        self.embedding_dim = self.vit.num_features  # Thường là 768

        self.classifier = nn.Sequential(
            nn.Linear(self.embedding_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        # x: [B, 4, C, H, W]
        B, N, C, H, W = x.shape
        x = x.view(B * N, C, H, W)

        embeddings = self.vit(x)  # [B*4, D]
        embeddings = embeddings.view(B, N, -1)  # [B, 4, D]

        # Mean pooling over 4 embeddings
        pooled = embeddings.mean(dim=1)  # [B, D]
        out = self.classifier(pooled)
        return out


In [None]:
model = ViT_MushroomClassifier(vit_model_name='vit_base_patch16_224', num_classes=num_classes).to(device)

In [144]:
# loss và optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

In [145]:
training(model, optimizer, criterion, train_loader, val_loader, num_epochs=num_epochs, device='cuda')

[Epoch 1/10] Loss: 0.9397
Validation performance:
                        precision    recall  f1-score   support

   bào ngư xám + trắng       0.80      1.00      0.89         4
Đùi gà Baby (cắt ngắn)       1.00      1.00      1.00         6
                nấm mỡ       1.00      0.50      0.67         2
        linh chi trắng       1.00      1.00      1.00         3

              accuracy                           0.93        15
             macro avg       0.95      0.88      0.89        15
          weighted avg       0.95      0.93      0.93        15

[Epoch 2/10] Loss: 0.1488
Validation performance:
                        precision    recall  f1-score   support

   bào ngư xám + trắng       1.00      1.00      1.00         4
Đùi gà Baby (cắt ngắn)       1.00      1.00      1.00         6
                nấm mỡ       1.00      1.00      1.00         2
        linh chi trắng       1.00      1.00      1.00         3

              accuracy                           1.00        15

In [146]:
evaluate(model, val_loader)

                        precision    recall  f1-score   support

   bào ngư xám + trắng       1.00      1.00      1.00         4
Đùi gà Baby (cắt ngắn)       1.00      1.00      1.00         6
                nấm mỡ       1.00      1.00      1.00         2
        linh chi trắng       1.00      1.00      1.00         3

              accuracy                           1.00        15
             macro avg       1.00      1.00      1.00        15
          weighted avg       1.00      1.00      1.00        15



In [148]:
torch.save(model.state_dict(), "vit_mushroom_multi_4_best.pth")

In [None]:
# model.load_state_dict(torch.load("vit_mushroom_multi_3_best.pth"))

In [131]:
def predict_single_image(model, image_path, transform, num_images_per_sample, device):
    """
    Fixed function to handle single image prediction with a multi-image model.
    """
    # Load and transform the image
    image = Image.open(image_path).convert("RGB")
    image = transform(image)
    
    # Create a batch with 2 copies of the same image to match expected shape [B, N, C, H, W]
    # where N is num_images_per_sample (2 in your case)
    image = torch.stack([image] * num_images_per_sample).unsqueeze(0).to(device)  # Shape: [1, N, C, H, W]
    
    model.eval()
    with torch.no_grad():
        output = model(image)
        pred = torch.argmax(output, dim=1).item()
    
    # Access class names from the dataset
    # class_names = ["Mỡ", "Bào Ngư", "Đùi Gà", "Linh Chi Trắng"]
    # return class_names[pred]
    return pred

In [None]:
# predict_single_image(model=model,
#                      image_path="/kaggle/input/aio-hutech/test/001.jpg",
#                      transform=transform,
#                      num_images_per_sample=num_images_per_sample,
#                      device=device)

In [None]:
# def predict_folder(model, test_dir, transform, num_images_per_sample, device):
#     for filename in os.listdir(test_dir):
#         if filename.lower().endswith(('.jpg', '.png', '.jpeg')):
#             path = os.path.join(test_dir, filename)
#             predicted_class = predict_single_image(model, path, transform, num_images_per_sample, device)
#             print(f"{filename}: {predicted_class}")


In [None]:
# predict_folder(model,
#                test_dir="/kaggle/input/aio-hutech/test",
#                transform=transform,
#                num_images_per_sample=num_images_per_sample,
#                device=device)

In [48]:
import pandas as pd

def predict_folder_to_csv(model, test_dir, transform, num_images_per_sample, device, output_csv="predictions.csv"):
    results = []
    for filename in os.listdir(test_dir):
        if filename.lower().endswith(('.jpg', '.png', '.jpeg')):
            path = os.path.join(test_dir, filename)
            predicted_class = predict_single_image(model, path, transform, num_images_per_sample, device)
            file_id = os.path.splitext(filename)[0]  # Extract the prefix (ID) from the filename
            results.append({'id': file_id, 'type': predicted_class})

    df = pd.DataFrame(results)
    
    df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")


In [147]:
predict_folder_to_csv(model=model,
                      test_dir="/kaggle/input/aio-hutech/test",
                      transform=transform,
                      num_images_per_sample=num_images_per_sample,
                      device=device,
                      output_csv="/kaggle/working/submission-model4_10.csv")

Predictions saved to /kaggle/working/submission-model4_10.csv


In [149]:
from collections import Counter
import torch.nn.functional as F
import re

def extract_num_images_from_model_name(model):
    """
    Hàm phụ để trích xuất num_images_per_sample từ model.name (ví dụ: "model_3" → 3).
    """
    match = re.search(r'(\d+)', getattr(model, "name", "1"))
    return int(match.group(1)) if match else 1

def ensemble_predict_single_image(models, image_path, transform, device, method="soft"):
    """
    Dự đoán nhãn của một ảnh bằng cách ensemble nhiều mô hình, tự động lấy num_images_per_sample từ tên model.
    """
    predictions = []
    prob_sum = None

    for model in models:
        model.eval()
        num_images_per_sample = extract_num_images_from_model_name(model)

        image = Image.open(image_path).convert("RGB")
        image_tensor = transform(image)
        image_tensor = torch.stack([image_tensor] * num_images_per_sample).unsqueeze(0).to(device)  # [1, N, C, H, W]

        with torch.no_grad():
            output = model(image_tensor)
            probs = F.softmax(output, dim=1)
            pred = torch.argmax(probs, dim=1).item()
            predictions.append(pred)

            if prob_sum is None:
                prob_sum = probs
            else:
                prob_sum += probs

    if method == "majority":
        return Counter(predictions).most_common(1)[0][0]
    elif method == "soft":
        avg_probs = prob_sum / len(models)
        return torch.argmax(avg_probs, dim=1).item()
    else:
        raise ValueError("Unknown ensemble method. Use 'soft' or 'majority'.")


In [150]:
import pandas as pd

def ensemble_predict_folder_to_csv(models, test_dir, transform, device, method="soft", output_csv="submission_ensemble.csv"):
    results = []
    for filename in sorted(os.listdir(test_dir)):
        if filename.lower().endswith(('.jpg', '.png', '.jpeg')):
            path = os.path.join(test_dir, filename)
            predicted_class = ensemble_predict_single_image(
                models=models,
                image_path=path,
                transform=transform,
                device=device,
                method=method
            )
            file_id = os.path.splitext(filename)[0]
            results.append({'id': file_id, 'type': predicted_class})

    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)
    print(f"Ensemble predictions saved to {output_csv}")


In [152]:
model_1 = ViT_MushroomClassifier(vit_model_name='vit_base_patch16_224', num_classes=num_classes).to(device)
model_2 = ViT_MushroomClassifier(vit_model_name='vit_base_patch16_224', num_classes=num_classes).to(device)
model_3 = ViT_MushroomClassifier(vit_model_name='vit_base_patch16_224', num_classes=num_classes).to(device)
model_4 = ViT_MushroomClassifier(vit_model_name='vit_base_patch16_224', num_classes=num_classes).to(device)

In [153]:
model_1.load_state_dict(torch.load("vit_mushroom_multi_1_best.pth"))
model_2.load_state_dict(torch.load("vit_mushroom_multi_2_best.pth"))
model_3.load_state_dict(torch.load("vit_mushroom_multi_3_best.pth"))
model_4.load_state_dict(torch.load("vit_mushroom_multi_4_best.pth"))

  model_1.load_state_dict(torch.load("vit_mushroom_multi_1_best.pth"))
  model_2.load_state_dict(torch.load("vit_mushroom_multi_2_best.pth"))
  model_3.load_state_dict(torch.load("vit_mushroom_multi_3_best.pth"))
  model_4.load_state_dict(torch.load("vit_mushroom_multi_4_best.pth"))


<All keys matched successfully>

In [155]:
models = [model_1, model_2, model_3, model_4]  # các mô hình đã được load lên device

ensemble_predict_folder_to_csv(
    models=models,
    test_dir="/kaggle/input/aio-hutech/test",
    transform=transform,
    device=device,
    method="majority",  #"soft" hoặc "majority"
    output_csv="/kaggle/working/submission_ensemble_best_majority.csv"
)

Ensemble predictions saved to /kaggle/working/submission_ensemble_best_majority.csv


In [None]:
# from collections import defaultdict
# import torch.nn.functional as F

# def pseudo_label_test_set_group_by_class(model, test_folder, transform, device, num_images_per_sample=2, threshold=0.7):
#     model.eval()
#     image_paths = sorted(glob.glob(os.path.join(test_folder, "*.jpg")))
#     pseudo_labels = []

#     for path in image_paths:
#         image = Image.open(path).convert("RGB")
#         image_tensor = transform(image).unsqueeze(0).to(device)  # [1, C, H, W]

#         # Nhân bản ảnh này num_images_per_sample lần
#         duplicated = image_tensor.repeat(num_images_per_sample, 1, 1, 1)  # [N, C, H, W]
#         duplicated = duplicated.unsqueeze(0)  # [1, N, C, H, W]

#         with torch.no_grad():
#             output = model(duplicated)
#             probs = F.softmax(output, dim=1)
#             confidence, pred_class = torch.max(probs, dim=1)

#         if confidence.item() >= threshold:
#             pseudo_labels.append({
#                 'image_paths': [path] * num_images_per_sample,  # nhân bản đúng theo format
#                 'label': pred_class.item(),
#                 'confidence': confidence.item()
#             })

#     return pseudo_labels


In [None]:
# class MultiImagePseudoLabeledDataset(Dataset):
#     def __init__(self, pseudo_labels, transform, num_images_per_sample=4):
#         self.samples = pseudo_labels
#         self.transform = transform
#         self.num_images_per_sample = num_images_per_sample

#     def __len__(self):
#         return len(self.samples)

#     def __getitem__(self, idx):
#         sample = self.samples[idx]
#         imgs = []
#         for path in sample['image_paths']:
#             image = Image.open(path).convert("RGB")
#             if self.transform:
#                 image = self.transform(image)
#             imgs.append(image)
#         return torch.stack(imgs), torch.tensor(sample['label'])


In [None]:
# pseudo_labels = pseudo_label_test_set_group_by_class(
#     model=model,
#     test_folder="/kaggle/input/aio-hutech/test",
#     transform=transform,
#     device=device,
#     num_images_per_sample=num_images_per_sample,
#     threshold=0.7
# )

# pseudo_dataset = MultiImagePseudoLabeledDataset(
#     pseudo_labels=pseudo_labels,
#     transform=transform,
#     num_images_per_sample=num_images_per_sample
# )

# pseudo_loader = DataLoader(pseudo_dataset, batch_size=8, shuffle=True)


In [None]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# from tqdm import tqdm

# def fine_tune_with_pseudo_loader(model, pseudo_loader, device, epochs=5, lr=1e-4):
#     model.to(device)
#     model.train()

#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.Adam(model.parameters(), lr=lr)

#     for epoch in range(epochs):
#         running_loss = 0.0
#         correct = 0
#         total = 0

#         pbar = tqdm(pseudo_loader, desc=f"Epoch {epoch+1}/{epochs}")
#         for images, labels in pbar:
#             # images: [B, N, C, H, W]
#             images, labels = images.to(device), labels.to(device)

#             outputs = model(images)  # model đã nhận được [B, N, C, H, W]
#             loss = criterion(outputs, labels)

#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()

#             running_loss += loss.item()
#             _, preds = outputs.max(1)
#             correct += (preds == labels).sum().item()
#             total += labels.size(0)

#             pbar.set_postfix(loss=loss.item(), acc=100 * correct / total)

#         epoch_loss = running_loss / len(pseudo_loader)
#         epoch_acc = 100. * correct / total
#         print(f"✅ Epoch {epoch+1}: Loss = {epoch_loss:.4f}, Accuracy = {epoch_acc:.2f}%")

#     return model


In [None]:
# finetuned_model = fine_tune_with_pseudo_loader(
#     model=model,
#     pseudo_loader=pseudo_loader,
#     device=device,
#     epochs=5,
#     lr=1e-4
# )


In [None]:
# import os

# # Specify the path to the file you want to remove
# file_path = "/kaggle/working/submission-model_1.csv"

# # Check if the file exists, then remove it
# if os.path.exists(file_path):
#     os.remove(file_path)
#     print(f"File '{file_path}' has been removed.")
# else:
#     print(f"File '{file_path}' does not exist.")