In [1]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from tqdm import tqdm

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cpu


In [3]:
class CustomImageDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_name = self.annotations.iloc[idx]['id']
        label = int(self.annotations.iloc[idx]['label'])

        # Cari file di subfolder (karena ada folder "Bacterial Pneumonia", dst.)
        for subfolder in os.listdir(self.root_dir):
            path = os.path.join(self.root_dir, subfolder, img_name)
            if os.path.exists(path):
                image = Image.open(path).convert("RGB")
                break

        if self.transform:
            image = self.transform(image)
        return image, label


class CustomTestDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        # kumpulin semua path test
        self.files = []
        for subfolder in os.listdir(root_dir):
            folder_path = os.path.join(root_dir, subfolder)
            if os.path.isdir(folder_path):
                self.files.extend([os.path.join(folder_path, f) for f in os.listdir(folder_path)])
            else:
                self.files.append(folder_path)

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img_path = self.files[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        filename = os.path.basename(img_path)
        return image, filename

In [4]:
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

transform_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])


In [5]:
train_dataset = CustomImageDataset(
    r"D:\Semester 5\Competition\Srifiton-nama-timnya-apa-ya\srifoton-25-machine-learning-competition\train.csv",
    r"D:\Semester 5\Competition\Srifiton-nama-timnya-apa-ya\srifoton-25-machine-learning-competition\train",
    transform=transform_train
)

val_dataset = CustomImageDataset(
    r"D:\Semester 5\Competition\Srifiton-nama-timnya-apa-ya\srifoton-25-machine-learning-competition\val.csv",
    r"D:\Semester 5\Competition\Srifiton-nama-timnya-apa-ya\srifoton-25-machine-learning-competition\val",
    transform=transform_val
)

test_dataset = CustomTestDataset(
    r"D:\Semester 5\Competition\Srifiton-nama-timnya-apa-ya\srifoton-25-machine-learning-competition\test",
    transform=transform_val
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False)

print("Train samples:", len(train_dataset))
print("Val samples:", len(val_dataset))
print("Test samples:", len(test_dataset))

Train samples: 6054
Val samples: 2016
Test samples: 2025


In [6]:
num_classes = len(set(train_dataset.annotations['label']))
model = models.resnet18(weights="IMAGENET1K_V1")
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [8]:
num_epochs = 10
best_val_acc = 0.0

for epoch in range(num_epochs):
    # ---- Train ----
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_acc = 100 * correct / total
    print(f"Train Loss: {running_loss/len(train_loader):.4f}, Train Acc: {train_acc:.2f}%")

    # ---- Validation ----
    model.eval()
    val_correct, val_total = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    val_acc = 100 * val_correct / val_total
    print(f"Validation Acc: {val_acc:.2f}%")

    # Simpan model terbaik
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_model.pth")
        print("Model disimpan (best).")

Epoch 1/10: 100%|██████████| 190/190 [05:55<00:00,  1.87s/it]


Train Loss: 0.4471, Train Acc: 81.80%


UnboundLocalError: cannot access local variable 'image' where it is not associated with a value

In [9]:
model.load_state_dict(torch.load("best_model.pth"))
model.eval()
predictions, filenames = [], []

with torch.no_grad():
    for images, paths in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())
        filenames.extend(paths)
        

In [None]:
df_sub = pd.DataFrame({"id": filenames, "Predicted": predictions})
df_sub.to_csv("submission.csv", index=False)
print("Submission file saved: submission.csv")

Submission file saved: submission.csv
