# Sys check

In [1]:
import os
root = os.getcwd()
root

'd:\\IT\\GITHUB\\Hutech-AI-Challenge\\PatternFinding'

In [2]:
!nvidia-smi

Mon Apr 14 22:33:22 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.94                 Driver Version: 560.94         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce GTX 1650 Ti   WDDM  |   00000000:01:00.0  On |                  N/A |
| N/A   52C    P8              5W /   50W |     663MiB /   4096MiB |     25%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
import torch 
print(torch.__version__)
print(torch.cuda.is_available())

2.6.0+cu118
True


# Build

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report

import os
import matplotlib.pyplot as plt

In [4]:
def training(model, optimizer, criterion, train_loader, val_loader, num_epochs=10, device='cuda'):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"[Epoch {epoch+1}/{num_epochs}] Loss: {running_loss / len(train_loader):.4f}")
        
        # Evaluate on validation set
        print("Validation performance:")
        evaluate(model, val_loader)

In [5]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet mean/std
                         std=[0.229, 0.224, 0.225])
])

In [72]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 4
batch_size = 8
num_epochs = 10
learning_rate = 1e-5
num_images_per_sample=4

In [73]:
# Hàm đánh giá
def evaluate(model, dataloader):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    report = classification_report(y_true, y_pred, target_names=["Abalone", "Baby Drumstick", "Button Mushroom", "White Lingzhi"])
    print(report)

In [74]:
from torch.utils.data import Dataset
from PIL import Image
import glob
import os
import torch
import re

class MultiImageMushroomDataset(Dataset):
    def __init__(self, root_dir, transform=None, num_images_per_sample=4):
        """
        Dataset phân loại nấm sử dụng nhiều ảnh một mẫu.

        Args:
            root_dir (str): Thư mục gốc chứa các class folders.
            transform (callable, optional): Transform áp dụng lên ảnh.
            num_images_per_sample (int): Số ảnh muốn nhóm lại thành 1 sample (mặc định 4).
        """
        self.samples = []
        self.transform = transform
        self.num_images_per_sample = num_images_per_sample

        # Map từ prefix sang class label
        self.prefix2class = {
            'NM': 'Button Mushroom',
            'BN': 'Abalone',
            'DG': 'Baby Drumstick',
            'LC': 'White Lingzhi'
        }
        self.class_names = list(self.prefix2class.values())
        self.class2idx = {name: idx for idx, name in enumerate(self.class_names)}

        # Duyệt từng class folder
        for class_name in self.class_names:
            class_path = os.path.join(root_dir, class_name)
            images = sorted(glob.glob(os.path.join(class_path, "*.jpg")))

            # Gom ảnh theo prefix (BM, AB, ...)
            prefix_groups = {}
            for img_path in images:
                filename = os.path.basename(img_path)
                match = re.match(r"([A-Z]{2})\d+", filename)
                if match:
                    prefix = match.group(1)
                    if prefix not in prefix_groups:
                        prefix_groups[prefix] = []
                    prefix_groups[prefix].append(img_path)

            # Tạo samples từ các nhóm ảnh
            for prefix, img_list in prefix_groups.items():
                img_list = sorted(img_list)
                label_idx = self.class2idx[self.prefix2class[prefix]]

                for i in range(0, len(img_list), self.num_images_per_sample):
                    selected = img_list[i:i+self.num_images_per_sample]
                    if len(selected) < self.num_images_per_sample:
                        selected = (selected + [selected[0]] * self.num_images_per_sample)[:self.num_images_per_sample]
                    self.samples.append((selected, label_idx))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_paths, label = self.samples[idx]
        imgs = []

        for path in img_paths:
            image = Image.open(path).convert("RGB")
            if self.transform:
                image = self.transform(image)
            imgs.append(image)

        return torch.stack(imgs), torch.tensor(label)


In [75]:
from torch.utils.data import DataLoader, random_split
import torch
import os
from tqdm.notebook import tqdm  # For progress bars in notebooks

def create_dataloaders(dataset_root, transform, num_images_per_sample=2, 
                      batch_size=16, val_split=0.2, seed=42, 
                      num_workers=0, pin_memory=True):
    """
    Create optimized train and validation dataloaders with better error handling
    and performance settings.
    
    Args:
        dataset_root (str): Root directory for dataset
        transform: Data transformations to apply
        num_images_per_sample (int): Number of images per sample
        batch_size (int): Batch size for training
        val_split (float): Validation split ratio (0-1)
        seed (int): Random seed for reproducibility
        num_workers (int): Number of workers for data loading (0 for no multiprocessing)
        pin_memory (bool): Whether to pin memory for faster GPU transfer
        
    Returns:
        tuple: (train_loader, val_loader)
    """
    # Check if dataset directory exists
    if not os.path.exists(dataset_root):
        raise FileNotFoundError(f"Dataset directory not found: {dataset_root}")
    
    print(f"Loading dataset from {dataset_root}...")
    
    # Create dataset with progress reporting
    try:
        dataset = MultiImageMushroomDataset(
            root_dir=dataset_root,
            transform=transform,
            num_images_per_sample=num_images_per_sample
        )
    except Exception as e:
        print(f"Error creating dataset: {str(e)}")
        raise
    
    # Display dataset info
    print(f"Dataset loaded: {len(dataset)} samples")
    
    # Set random seed for reproducible splits
    generator = torch.Generator().manual_seed(seed)
    
    # Calculate split sizes
    val_size = int(len(dataset) * val_split)
    train_size = len(dataset) - val_size
    
    # Split dataset
    train_dataset, val_dataset = random_split(
        dataset, [train_size, val_size], generator=generator
    )
    
    print(f"Split: {train_size} training samples, {val_size} validation samples")
    
    # Create dataloaders with optimized settings
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True, 
        num_workers=num_workers,  # Set to 0 to avoid pickle errors
        pin_memory=pin_memory,    # Faster data transfer to GPU
        drop_last=False,          # Use all samples
        persistent_workers=(num_workers > 0),  # Keep workers alive between epochs
    )
    
    val_loader = DataLoader(
        val_dataset, 
        batch_size=batch_size, 
        shuffle=False,  # No need to shuffle validation data
        num_workers=num_workers, 
        pin_memory=pin_memory,
    )
    
    return train_loader, val_loader

# Create dataloaders with optimized settings
train_loader, val_loader = create_dataloaders(
    dataset_root="D:\\IT\\GITHUB\\Hutech-AI-Challenge\\data\\train",
    transform=transform,
    num_images_per_sample=num_images_per_sample,
    batch_size=8,
    num_workers=0,  # Fix pickle error by using 0 workers
    pin_memory=torch.cuda.is_available(),  # Only pin if CUDA is available
)

Loading dataset from D:\IT\GITHUB\Hutech-AI-Challenge\data\train...
Dataset loaded: 300 samples
Split: 240 training samples, 60 validation samples


In [76]:
val_loader

<torch.utils.data.dataloader.DataLoader at 0x27753b314f0>

In [77]:
for images, labels in train_loader:
    print("Label batch:", labels)
    print("Max label:", labels.max())
    print("Min label:", labels.min())
    break


Label batch: tensor([1, 0, 2, 2, 3, 1, 1, 0])
Max label: tensor(3)
Min label: tensor(0)


In [78]:
import timm
import torch
import torch.nn as nn

class ViT_MushroomClassifier(nn.Module):
    def __init__(self, vit_model_name='vit_base_patch16_224', num_classes=4):
        super(ViT_MushroomClassifier, self).__init__()
        self.vit = timm.create_model(vit_model_name, pretrained=True)
        self.vit.head = nn.Identity()  # Bỏ classification head của ViT

        self.embedding_dim = self.vit.num_features  # Thường là 768

        self.classifier = nn.Sequential(
            nn.Linear(self.embedding_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        # x: [B, 4, C, H, W]
        B, N, C, H, W = x.shape
        x = x.view(B * N, C, H, W)

        embeddings = self.vit(x)  # [B*4, D]
        embeddings = embeddings.view(B, N, -1)  # [B, 4, D]

        # Mean pooling over 4 embeddings
        pooled = embeddings.mean(dim=1)  # [B, D]
        out = self.classifier(pooled)
        return out


In [79]:
model = ViT_MushroomClassifier(vit_model_name='vit_base_patch16_224', num_classes=4).to(device)

In [80]:
# loss và optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

In [None]:
training(model, optimizer, criterion, train_loader, val_loader, num_epochs=2, device='cuda')

In [None]:
evaluate(model, val_loader)

                 precision    recall  f1-score   support

        Abalone       1.00      1.00      1.00        32
 Baby Drumstick       1.00      1.00      1.00        31
Button Mushroom       1.00      1.00      1.00        31
  White Lingzhi       1.00      1.00      1.00        26

       accuracy                           1.00       120
      macro avg       1.00      1.00      1.00       120
   weighted avg       1.00      1.00      1.00       120



In [None]:
# Save the model's state dictionary
torch.save(model.state_dict(), "vit_mushroom_multi.pth")

In [None]:
# model.load_state_dict(torch.load("vit_liveness_multi.pth"))

In [None]:
def predict_single_image(model, image_path, transform, num_images_per_sample, device):
    """
    Fixed function to handle single image prediction with a multi-image model.
    """
    # Load and transform the image
    image = Image.open(image_path).convert("RGB")
    image = transform(image)
    
    # Create a batch with 2 copies of the same image to match expected shape [B, N, C, H, W]
    # where N is num_images_per_sample (2 in your case)
    image = torch.stack([image] * num_images_per_sample).unsqueeze(0).to(device)  # Shape: [1, N, C, H, W]
    
    model.eval()
    with torch.no_grad():
        output = model(image)
        pred = torch.argmax(output, dim=1).item()
    
    # Access class names from the dataset
    # class_names = ["Mỡ", "Bào Ngư", "Đùi Gà", "Linh Chi Trắng"]
    # return class_names[pred]
    return pred

In [None]:
predict_single_image(model=model,
                     image_path="D:\\IT\\GITHUB\\Hutech-AI-Challenge\\data\\test\\165.jpg",
                     transform=transform,
                     num_images_per_sample=num_images_per_sample,
                     device=device)

0

In [None]:
def predict_folder(model, test_dir, transform, num_images_per_sample, device):
    for filename in os.listdir(test_dir):
        if filename.lower().endswith(('.jpg', '.png', '.jpeg')):
            path = os.path.join(test_dir, filename)
            predicted_class = predict_single_image(model, path, transform, num_images_per_sample, device)
            print(f"{filename}: {predicted_class}")


In [None]:
# model là mô hình ViT đã load từ checkpoint và đã .to(device)
predict_folder(model,
               test_dir="D:\\IT\\GITHUB\\Hutech-AI-Challenge\\data\\test",
               transform=transform,
               num_images_per_sample=num_images_per_sample,
               device=device)

001.jpg: 1
002.jpg: 1
003.jpg: 1
004.jpg: 1
005.jpg: 1
006.jpg: 1
007.jpg: 1
008.jpg: 1
009.jpg: 1
010.jpg: 1
011.jpg: 1
012.jpg: 1
013.jpg: 1
014.jpg: 1
015.jpg: 1
016.jpg: 1
017.jpg: 1
018.jpg: 1
019.jpg: 1
020.jpg: 1
021.jpg: 1
022.jpg: 1
023.jpg: 1
024.jpg: 1
025.jpg: 1
026.jpg: 1
027.jpg: 1
028.jpg: 1
029.jpg: 1
030.jpg: 1
031.jpg: 1
032.jpg: 1
033.jpg: 1
034.jpg: 1
035.jpg: 1
036.jpg: 1
037.jpg: 1
038.jpg: 1
039.jpg: 1
040.jpg: 1
041.jpg: 1
042.jpg: 1
043.jpg: 1
044.jpg: 1
045.jpg: 1
046.jpg: 1
047.jpg: 1
048.jpg: 1
049.jpg: 1
050.jpg: 1
051.jpg: 2
052.jpg: 2
053.jpg: 2
054.jpg: 2
055.jpg: 2
056.jpg: 1
057.jpg: 1
058.jpg: 1
059.jpg: 2
060.jpg: 2
061.jpg: 2
062.jpg: 2
063.jpg: 3
064.jpg: 2
065.jpg: 2
066.jpg: 3
067.jpg: 2
068.jpg: 2
069.jpg: 3
070.jpg: 2
071.jpg: 2
072.jpg: 2
073.jpg: 3
074.jpg: 2
075.jpg: 3
076.jpg: 3
077.jpg: 3
078.jpg: 3
079.jpg: 3
080.jpg: 2
081.jpg: 2
082.jpg: 2
083.jpg: 2
084.jpg: 2
085.jpg: 2
086.jpg: 1
087.jpg: 1
088.jpg: 1
089.jpg: 1
090.jpg: 3
091.jpg: 1

In [None]:
import pandas as pd

def predict_folder_to_csv(model, test_dir, transform, num_images_per_sample, device, output_csv="predictions.csv"):
    results = []
    for filename in os.listdir(test_dir):
        if filename.lower().endswith(('.jpg', '.png', '.jpeg')):
            path = os.path.join(test_dir, filename)
            predicted_class = predict_single_image(model, path, transform, num_images_per_sample, device)
            file_id = os.path.splitext(filename)[0]  # Extract the prefix (ID) from the filename
            results.append({'id': file_id, 'type': predicted_class})

    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")


In [None]:
predict_folder_to_csv(model=model,
                      test_dir="D:\\IT\\GITHUB\\Hutech-AI-Challenge\\data\\test",
                      transform=transform,
                      num_images_per_sample=num_images_per_sample,
                      device=device,
                      output_csv="submission-3.csv")

Predictions saved to submission-2.csv
