In [37]:
pip install timm torchvision torch torchvision torchaudio tqdm scikit-learn matplotlib pandas


Note: you may need to restart the kernel to use updated packages.


## SETUP

In [38]:
import os
import time
import math
import random

import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms
from PIL import Image

import timm

from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

## KONFIGURASI 

In [39]:
DATA_DIR = "dataset"
TRAIN_CSV = "train.csv"
VAL_CSV = "val.csv"
TEST_CSV = "test.csv"

# Ganti ini kalau mau model lain
MODEL_NAME = "vit_tiny_patch16_224"    # opsi 1
# MODEL_NAME = "deit_tiny_patch16_224" # opsi 2, jalankan di run terpisah

MODEL_TAG = MODEL_NAME.replace("_patch16_224", "").replace("_tiny", "_tiny")

NUM_CLASSES = 5
IMAGE_SIZE = 224
BATCH_SIZE = 16
NUM_EPOCHS = 15
LR = 1e-4
WEIGHT_DECAY = 1e-4
NUM_WORKERS = 0
SEED = 42

DEVICE = "cpu"
print(f"Device: {DEVICE}")

Device: cpu


## SEEDING

In [40]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True

set_seed(SEED)

## KELAS DAN MAPPING LABEL

In [41]:
CLASSES = ["bakso", "gado_gado", "nasi_goreng", "rendang", "soto_ayam"]
CLASS_TO_IDX = {c: i for i, c in enumerate(CLASSES)}
IDX_TO_CLASS = {i: c for c, i in CLASS_TO_IDX.items()}

print("Class to idx mapping:", CLASS_TO_IDX)

Class to idx mapping: {'bakso': 0, 'gado_gado': 1, 'nasi_goreng': 2, 'rendang': 3, 'soto_ayam': 4}


## DATASET

In [42]:
class FoodDataset(Dataset):
    def __init__(self, csv_path, img_dir, transform=None):
        self.df = pd.read_csv(csv_path)
        self.img_dir = img_dir
        self.transform = transform

        self.filenames = self.df["filename"].values
        self.labels_str = self.df["label"].values
        self.labels = [CLASS_TO_IDX[label] for label in self.labels_str]

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        filename = self.filenames[idx]
        label = self.labels[idx]
    
        img_path = os.path.join(self.img_dir, filename)
    
        try:
            image = Image.open(img_path).convert("RGB")
        except Exception as e:
            raise RuntimeError(f"Gagal membuka gambar: {img_path} | Error: {e}")
    
        if self.transform is not None:
            image = self.transform(image)
    
        return image, label


## TRANSFORMS

In [43]:
train_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225),
    ),
])

eval_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225),
    ),
])

## DATALOADER

In [44]:
train_dataset = FoodDataset(TRAIN_CSV, DATA_DIR, transform=train_transform)
val_dataset   = FoodDataset(VAL_CSV, DATA_DIR, transform=eval_transform)
test_dataset  = FoodDataset(TEST_CSV, DATA_DIR, transform=eval_transform)

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0
)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False,
                        num_workers=NUM_WORKERS, pin_memory=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False,
                         num_workers=NUM_WORKERS, pin_memory=False)

print(f"Train: {len(train_dataset)} | Val: {len(val_dataset)} | Test: {len(test_dataset)}")

Train: 2228 | Val: 278 | Test: 279


## MODEL

In [45]:
def create_model(model_name, num_classes):
    model = timm.create_model(
        model_name,
        pretrained=True,
        num_classes=num_classes
    )
    return model

model = create_model(MODEL_NAME, NUM_CLASSES)
model.to(DEVICE)

# Info parameter
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
non_trainable_params = total_params - trainable_params
model_size_mb = total_params * 4 / (1024 ** 2)  # float32 → 4 byte

print("===== INFO PARAMETER MODEL =====")
print(f"Model name          : {MODEL_NAME}")
print(f"Total params        : {total_params:,}")
print(f"Trainable params    : {trainable_params:,}")
print(f"Non-trainable params: {non_trainable_params:,}")
print(f"Approx. size        : {model_size_mb:.2f} MB")

===== INFO PARAMETER MODEL =====
Model name          : vit_tiny_patch16_224
Total params        : 5,525,381
Trainable params    : 5,525,381
Non-trainable params: 0
Approx. size        : 21.08 MB


## LOSS, OPTIMIZER, SCHEDULER

In [46]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS)


## FUNGSI TRAINING & VALIDASI

In [47]:
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    running_correct = 0
    running_total = 0

    pbar = tqdm(loader, desc="Train", leave=False)
    for images, labels in pbar:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        preds = outputs.argmax(dim=1)
        correct = (preds == labels).sum().item()
        total = labels.size(0)

        running_loss += loss.item() * total
        running_correct += correct
        running_total += total

        pbar.set_postfix({
            "loss": f"{loss.item():.4f}",
            "acc": f"{correct / total:.4f}"
        })

    epoch_loss = running_loss / running_total
    epoch_acc = running_correct / running_total
    return epoch_loss, epoch_acc


def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    running_correct = 0
    running_total = 0

    all_labels = []
    all_preds = []

    with torch.no_grad():
        pbar = tqdm(loader, desc="Eval", leave=False)
        for images, labels in pbar:
            images = images.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            outputs = model(images)
            loss = criterion(outputs, labels)

            preds = outputs.argmax(dim=1)

            running_loss += loss.item() * labels.size(0)
            running_correct += (preds == labels).sum().item()
            running_total += labels.size(0)

            all_labels.append(labels.cpu().numpy())
            all_preds.append(preds.cpu().numpy())

    epoch_loss = running_loss / running_total
    epoch_acc = running_correct / running_total

    all_labels = np.concatenate(all_labels)
    all_preds = np.concatenate(all_preds)

    return epoch_loss, epoch_acc, all_labels, all_preds

## TRAINING LOOP

In [48]:
history = {
    "train_loss": [],
    "train_acc": [],
    "val_loss": [],
    "val_acc": []
}

best_val_acc = 0.0
best_state_dict = None

for epoch in range(NUM_EPOCHS):
    print(f"\n===== Epoch {epoch + 1}/{NUM_EPOCHS} =====")

    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, DEVICE)
    val_loss, val_acc, val_labels, val_preds = evaluate(model, val_loader, criterion, DEVICE)

    scheduler.step()

    history["train_loss"].append(train_loss)
    history["train_acc"].append(train_acc)
    history["val_loss"].append(val_loss)
    history["val_acc"].append(val_acc)

    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val   Loss: {val_loss:.4f} | Val   Acc: {val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_state_dict = model.state_dict()
        print(">> Best model updated!")

# Simpan model terbaik
os.makedirs("checkpoints", exist_ok=True)
model_path = os.path.join("checkpoints", f"{MODEL_TAG}_best.pth")

if best_state_dict is not None:
    torch.save(best_state_dict, model_path)
    print(f"Best model saved to: {model_path}")
else:
    print("WARNING: best_state_dict is None, model not saved.")

# Simpan history
history_path = f"{MODEL_TAG}_history.npy"
np.save(history_path, history)
print(f"Training history saved to: {history_path}")


===== Epoch 1/15 =====


                                                                                 

Train Loss: 0.6294 | Train Acc: 0.7630
Val   Loss: 0.1371 | Val   Acc: 0.9460
>> Best model updated!

===== Epoch 2/15 =====


                                                                                 

Train Loss: 0.1786 | Train Acc: 0.9381
Val   Loss: 0.1685 | Val   Acc: 0.9281

===== Epoch 3/15 =====


                                                                                 

Train Loss: 0.1091 | Train Acc: 0.9560
Val   Loss: 0.1098 | Val   Acc: 0.9748
>> Best model updated!

===== Epoch 4/15 =====


                                                                                 

Train Loss: 0.0626 | Train Acc: 0.9758
Val   Loss: 0.0803 | Val   Acc: 0.9712

===== Epoch 5/15 =====


                                                                                 

Train Loss: 0.0445 | Train Acc: 0.9829
Val   Loss: 0.0653 | Val   Acc: 0.9784
>> Best model updated!

===== Epoch 6/15 =====


                                                                                 

Train Loss: 0.0340 | Train Acc: 0.9897
Val   Loss: 0.0649 | Val   Acc: 0.9820
>> Best model updated!

===== Epoch 7/15 =====


                                                                                 

Train Loss: 0.0218 | Train Acc: 0.9928
Val   Loss: 0.0653 | Val   Acc: 0.9784

===== Epoch 8/15 =====


                                                                                 

Train Loss: 0.0154 | Train Acc: 0.9951
Val   Loss: 0.0834 | Val   Acc: 0.9676

===== Epoch 9/15 =====


                                                                                 

Train Loss: 0.0043 | Train Acc: 0.9991
Val   Loss: 0.0546 | Val   Acc: 0.9784

===== Epoch 10/15 =====


                                                                                 

Train Loss: 0.0064 | Train Acc: 0.9973
Val   Loss: 0.0603 | Val   Acc: 0.9748

===== Epoch 11/15 =====


                                                                                 

Train Loss: 0.0036 | Train Acc: 0.9987
Val   Loss: 0.0483 | Val   Acc: 0.9820

===== Epoch 12/15 =====


                                                                                 

Train Loss: 0.0036 | Train Acc: 0.9987
Val   Loss: 0.0507 | Val   Acc: 0.9820

===== Epoch 13/15 =====


                                                                                 

Train Loss: 0.0027 | Train Acc: 0.9996
Val   Loss: 0.0602 | Val   Acc: 0.9820

===== Epoch 14/15 =====


                                                                                 

Train Loss: 0.0020 | Train Acc: 0.9996
Val   Loss: 0.0561 | Val   Acc: 0.9820

===== Epoch 15/15 =====


                                                                                 

Train Loss: 0.0009 | Train Acc: 1.0000
Val   Loss: 0.0558 | Val   Acc: 0.9820
Best model saved to: checkpoints\vit_tiny_best.pth
Training history saved to: vit_tiny_history.npy


## EVALUASI DI TEST SET

In [49]:
print("\n===== EVALUASI DI TEST SET =====")

if best_state_dict is not None:
    model.load_state_dict(best_state_dict)

test_loss, test_acc, test_labels, test_preds = evaluate(model, test_loader, criterion, DEVICE)
print(f"Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.4f}")

target_names = CLASSES
report = classification_report(test_labels, test_preds, target_names=target_names, digits=4)
print("\nClassification Report:")
print(report)

# Simpan classification report ke file
with open(f"{MODEL_TAG}_classification_report.txt", "w") as f:
    f.write(report)

# Confusion matrix
cm = confusion_matrix(test_labels, test_preds)
fig, ax = plt.subplots(figsize=(6, 5))
im = ax.imshow(cm, interpolation="nearest")
ax.figure.colorbar(im, ax=ax)
ax.set(
    xticks=np.arange(cm.shape[1]),
    yticks=np.arange(cm.shape[0]),
    xticklabels=target_names,
    yticklabels=target_names,
    ylabel="True label",
    xlabel="Predicted label",
    title=f"Confusion Matrix - {MODEL_TAG}"
)
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")

for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, cm[i, j], ha="center", va="center", color="white" if cm[i, j] > cm.max()/2 else "black")

plt.tight_layout()
cm_path = f"{MODEL_TAG}_confusion_matrix.png"
plt.savefig(cm_path)
plt.close()
print(f"Confusion matrix saved to: {cm_path}")


===== EVALUASI DI TEST SET =====


                                                     

Test Loss: 0.0821 | Test Acc: 0.9857

Classification Report:
              precision    recall  f1-score   support

       bakso     0.9583    1.0000    0.9787        46
   gado_gado     0.9839    1.0000    0.9919        61
 nasi_goreng     0.9844    0.9844    0.9844        64
     rendang     1.0000    0.9841    0.9920        63
   soto_ayam     1.0000    0.9556    0.9773        45

    accuracy                         0.9857       279
   macro avg     0.9853    0.9848    0.9848       279
weighted avg     0.9860    0.9857    0.9857       279

Confusion matrix saved to: vit_tiny_confusion_matrix.png
Confusion matrix saved to: vit_tiny_confusion_matrix.png


## PENGUKURAN WAKTU INFERENSI

In [50]:
def measure_inference_time(model, loader, device, num_warmup=20, max_images=300):
    model.eval()
    total_images = 0
    total_time = 0.0

    # Warm-up
    with torch.no_grad():
        for i, (images, _) in enumerate(loader):
            images = images.to(device, non_blocking=True)
            _ = model(images)
            if (i + 1) * images.size(0) >= num_warmup:
                break

    # Timing
    with torch.no_grad():
        start = time.perf_counter()
        for images, _ in loader:
            images = images.to(device, non_blocking=True)
            _ = model(images)
            bs = images.size(0)
            total_images += bs
            if total_images >= max_images:
                break
        end = time.perf_counter()

    total_time = end - start
    avg_time_per_image = (total_time / total_images) * 1000.0  # ms
    throughput = total_images / total_time  # img/s
    return avg_time_per_image, throughput, total_images, total_time

print("\n===== MENGUKUR WAKTU INFERENSI (TEST SET, SUBSET) =====")
avg_ms, tpt, n_img, total_t = measure_inference_time(model, test_loader, DEVICE)
print(f"Images tested     : {n_img}")
print(f"Total time        : {total_t:.4f} s")
print(f"Avg time / image  : {avg_ms:.4f} ms")
print(f"Throughput        : {tpt:.2f} images/s")

# Simpan ringkasan hasil ke CSV / TXT
summary = {
    "model_name": MODEL_NAME,
    "total_params": total_params,
    "trainable_params": trainable_params,
    "non_trainable_params": non_trainable_params,
    "model_size_mb": model_size_mb,
    "best_val_acc": best_val_acc,
    "test_loss": test_loss,
    "test_acc": test_acc,
    "avg_inference_ms": avg_ms,
    "throughput_img_per_s": tpt,
    "num_test_inference": n_img,
    "total_inference_time_s": total_t,
}

summary_df = pd.DataFrame([summary])
summary_path = f"{MODEL_TAG}_summary.csv"
summary_df.to_csv(summary_path, index=False)
print(f"\nSummary saved to: {summary_path}")
print("Selesai.")


===== MENGUKUR WAKTU INFERENSI (TEST SET, SUBSET) =====
Images tested     : 279
Total time        : 13.6402 s
Avg time / image  : 48.8897 ms
Throughput        : 20.45 images/s

Summary saved to: vit_tiny_summary.csv
Selesai.
Images tested     : 279
Total time        : 13.6402 s
Avg time / image  : 48.8897 ms
Throughput        : 20.45 images/s

Summary saved to: vit_tiny_summary.csv
Selesai.
