In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from torchvision.models.vision_transformer import ViT_L_16_Weights # Added import
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

# --- Import tambahan untuk metrik, plotting, dan progress bar ---
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
from tqdm.auto import tqdm # <-- IMPORT PENTING UNTUK PROGRESS BAR

In [2]:
DEVICE = torch.device("cuda")
print(f"Menggunakan device: {DEVICE}")

Menggunakan device: cuda


In [3]:
!nvidia-smi

Mon Nov  3 05:10:48 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 577.09                 Driver Version: 577.09         CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4050 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   52C    P0             11W /   95W |       0MiB /   6141MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
IMAGE_SIZE = 512 # Changed from 224 to 512 to match ViT-Large input size
BATCH_SIZE = 4 # Sesuaikan jika perlu
LR_FEATURE_EXTRACTION = 1e-3
LR_FINE_TUNING = 1e-5

EPOCHS_FEATURE_EXTRACTION = 3 # Epoch pemanasan untuk head
MAX_EPOCHS_FINE_TUNING = 100  # Epoch maksimal untuk fine-tuning
EARLY_STOPPING_PATIENCE = 10  # Jumlah epoch untuk menunggu jika tidak ada peningkatan

In [6]:
DATA_DIR = r"C:\Users\taqiy\Downloads\data-mining-action-2025\train_with_label"

In [7]:
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [8]:
full_dataset = ImageFolder(root=DATA_DIR, transform=transform)
print(f"Dataset dimuat. Ditemukan {len(full_dataset)} gambar.")

Dataset dimuat. Ditemukan 3952 gambar.


In [9]:
class_names = full_dataset.classes
NUM_CLASSES = len(class_names)
print(f"Ditemukan {NUM_CLASSES} kelas: {class_names}")

Ditemukan 15 kelas: ['Ayam Bakar', 'Ayam Betutu', 'Ayam Goreng', 'Ayam Pop', 'Bakso', 'Coto Makassar', 'Gado Gado', 'Gudeg', 'Nasi Goreng', 'Pempek', 'Rawon', 'Rendang', 'Sate Madura', 'Sate Padang', 'Soto']


In [10]:
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])
print(f"Data latih: {len(train_dataset)}")
print(f"Data validasi: {len(val_dataset)}")

Data latih: 3161
Data validasi: 791


In [11]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

In [15]:
from torchvision import models
num_classes = state["heads.head.bias"].shape[0]         # 15
grid_tokens = state["encoder.pos_embedding"].shape[1]-1 # 1025-1 = 1024
grid_size = int(grid_tokens ** 0.5)                     # 32
image_size = grid_size * 16                             # 512

# 2) Bangun model dengan arsitektur yang sama
model = models.vit_l_16(weights=None,
                        num_classes=num_classes,
                        image_size=image_size)

# 3) Load state_dict
missing_unexpected = model.load_state_dict(state, strict=True)
print("Loaded OK.", missing_unexpected)

Loaded OK. <All keys matched successfully>


In [None]:
# torch.save(model.state_dict(), "vit_l_16_swag_e2e_v1_state.pth")
# print("Saved: vit_l_16_swag_e2e_v1_state.pth")

Saved: vit_l_16_swag_e2e_v1_state.pth


## Bekukan semua parameter di base model

In [17]:
for param in model.parameters():
    param.requires_grad = False

In [None]:
# !conda install pytorch torchvision pytorch-cuda=12.4 -c pytorch -c nvidia

^C


In [19]:
in_features = model.heads.head.in_features
model.heads.head = nn.Linear(in_features, NUM_CLASSES)
model = model.to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.heads.head.parameters(), lr=LR_FEATURE_EXTRACTION)

AssertionError: Torch not compiled with CUDA enabled

In [None]:
for epoch in range(EPOCHS_FEATURE_EXTRACTION):
    model.train()
    running_loss = 0.0
    running_correct = 0
    running_total = 0

    # Bungkus train_loader dengan tqdm
    loop = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{EPOCHS_FEATURE_EXTRACTION}")

    for inputs, labels in loop:
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Hitung akurasi
        _, predicted = torch.max(outputs.data, 1)
        running_total += labels.size(0)
        running_correct += (predicted == labels).sum().item()

        # Update progress bar
        loop.set_postfix(
            loss=(running_loss / (loop.n + 1)),
            accuracy=(running_correct / running_total)
        )

print('Tahap 1 Selesai.')

Epoch 1/3:   0%|          | 0/633 [00:00<?, ?it/s]

Epoch 2/3:   0%|          | 0/633 [00:00<?, ?it/s]

Epoch 3/3:   0%|          | 0/633 [00:00<?, ?it/s]

Tahap 1 Selesai.


In [None]:
print("\n--- Memulai Pelatihan Tahap 2 (Fine-Tuning) ---")

for param in model.parameters():
    param.requires_grad = True
optimizer = optim.Adam(model.parameters(), lr=LR_FINE_TUNING)

best_val_loss = float('inf')
patience_counter = 0
best_epoch = 0

for epoch in range(MAX_EPOCHS_FINE_TUNING):
    # --- Training Phase ---
    model.train()
    running_train_loss = 0.0
    running_train_correct = 0
    running_train_total = 0

    # Bungkus train_loader dengan tqdm
    train_loop = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{MAX_EPOCHS_FINE_TUNING} [Train]", leave=False)

    for inputs, labels in train_loop:
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        running_train_total += labels.size(0)
        running_train_correct += (predicted == labels).sum().item()

        # Update progress bar training
        train_loop.set_postfix(
            loss=(running_train_loss / (train_loop.n + 1)),
            accuracy=(running_train_correct / running_train_total)
        )

    avg_train_loss = running_train_loss / len(train_loader)
    avg_train_acc = running_train_correct / running_train_total

    # --- Validation Phase ---
    model.eval()
    running_val_loss = 0.0
    running_val_correct = 0
    running_val_total = 0

    # Bungkus val_loader dengan tqdm
    val_loop = tqdm(val_loader, desc=f"Epoch {epoch + 1}/{MAX_EPOCHS_FINE_TUNING} [Val]", leave=False)

    with torch.no_grad():
        for inputs, labels in val_loop:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            running_val_total += labels.size(0)
            running_val_correct += (predicted == labels).sum().item()

            # Update progress bar validasi
            val_loop.set_postfix(
                val_loss=(running_val_loss / (val_loop.n + 1)),
                val_accuracy=(running_val_correct / running_val_total)
            )

    avg_val_loss = running_val_loss / len(val_loader)
    avg_val_acc = running_val_correct / running_val_total

    # Cetak ringkasan epoch (mirip Keras)
    # Anda bisa mengatur format string ini sesuai keinginan
    print(f"Epoch {epoch + 1}/{MAX_EPOCHS_FINE_TUNING} - "
          f"loss: {avg_train_loss:.4f} - accuracy: {avg_train_acc:.4f} - "
          f"val_loss: {avg_val_loss:.4f} - val_accuracy: {avg_val_acc:.4f}")

    # --- Logika Early Stopping ---
    if avg_val_loss < best_val_loss:
        print(f'   Val loss membaik ({best_val_loss:.4f} --> {avg_val_loss:.4f}). Menyimpan model...')
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), 'best_model.pth')
        best_epoch = epoch + 1
    else:
        patience_counter += 1
        print(f'   Val loss tidak membaik. Patience: {patience_counter}/{EARLY_STOPPING_PATIENCE}')

    if patience_counter >= EARLY_STOPPING_PATIENCE:
        print(f'Early stopping dipicu pada epoch {epoch + 1}!')
        break

print(f'Tahap 2 Selesai. Model terbaik disimpan dari epoch {best_epoch}.')


--- Memulai Pelatihan Tahap 2 (Fine-Tuning) ---


Epoch 1/100 [Train]:   0%|          | 0/633 [00:00<?, ?it/s]

Epoch 1/100 [Val]:   0%|          | 0/159 [00:00<?, ?it/s]

Epoch 1/100 - loss: 0.5890 - accuracy: 0.8311 - val_loss: 0.5104 - val_accuracy: 0.8470
   Val loss membaik (inf --> 0.5104). Menyimpan model...


Epoch 2/100 [Train]:   0%|          | 0/633 [00:00<?, ?it/s]

In [None]:
# --- 6. Evaluasi Model (Laporan & Confusion Matrix) ---
# (Bagian ini tetap sama, tidak perlu diubah)
print("\n--- Evaluasi Model Final (dari best_model.pth) ---")

model.load_state_dict(torch.load('best_model.pth'))
model.eval()

all_labels = []
all_preds = []
correct = 0
total = 0

with torch.no_grad():
    for data in val_loader: # Menggunakan val_loader untuk evaluasi
        images, labels = data
        images, labels = images.to(DEVICE), labels.to(DEVICE)

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())

# 1. Cetak Akurasi Keseluruhan
accuracy = 100 * correct / total
print(f'Akurasi pada {total} gambar validasi: {accuracy:.2f} %')
print("-" * 70)

# 2. Cetak Classification Report
print("Classification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))
print("-" * 70)

# 3. Buat dan Tampilkan Confusion Matrix
print("Confusion Matrix:")
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Prediksi')
plt.ylabel('Aktual (True Label)')
plt.title('Confusion Matrix untuk Validasi Set')
plt.show()

In [None]:
import os
import glob
from PIL import Image
import pandas as pd
from tqdm.auto import tqdm

TEST_DIR = '/content/drive/MyDrive/test/test'
CSV_OUTPUT_PATH = '/content/drive/MyDrive/submission.csv'

# --- 2. Transformasi untuk Validasi/Tes ---
# (Tanpa augmentasi, hanya resize dan normalisasi)
transform_val = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# --- 3. Custom Dataset untuk Folder Tes ---
# Dataset ini memuat gambar dari folder datar (tanpa sub-folder label)

from torch.utils.data import Dataset # Import Dataset class
class CustomTestDataset(Dataset):
    def __init__(self, img_dir, transform=None):
        # Cari semua file gambar (jpg, jpeg, png)
        self.img_paths = glob.glob(os.path.join(img_dir, '*.jpg')) + \
                         glob.glob(os.path.join(img_dir, '*.jpeg')) + \
                         glob.glob(os.path.join(img_dir, '*.png'))
        self.transform = transform

        if not self.img_paths:
            print(f"Peringatan: Tidak ada gambar .jpg, .jpeg, atau .png yang ditemukan di {img_dir}")

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]

        # Buka gambar
        image = Image.open(img_path).convert('RGB')

        # Terapkan transformasi
        if self.transform:
            image = self.transform(image)

        # Ekstrak ID (nama file tanpa ekstensi)
        # misal: '/content/drive/MyDrive/test/1.jpg' -> '1'
        file_id = os.path.splitext(os.path.basename(img_path))[0]

        return image, file_id

# --- 4. Memuat Data Tes ---
print(f"Memuat data tes dari: {TEST_DIR}")
test_dataset = CustomTestDataset(img_dir=TEST_DIR, transform=transform_val)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0) # Set num_workers to 0 to avoid multiprocessing issues

print(f"Ditemukan {len(test_dataset)} gambar untuk diprediksi.")

# --- 5. Memuat Model yang Sudah Dilatih ---
print("Memuat model 'best_model.pth'...")

# Asumsi: Variabel NUM_CLASSES dan class_names ADA dari sel sebelumnya
# Jika tidak, Anda harus mendefinisikannya secara manual di sini, contoh:
# NUM_CLASSES = 16
# class_names = ['Ayam Bakar', 'Ayam Betutu', ...]

if 'NUM_CLASSES' not in locals():
    print("Error: Variabel 'NUM_CLASSES' tidak ditemukan. Jalankan sel pelatihan dulu.")
else:
    # 1. Buat ulang arsitektur model (MATCHING THE SAVED MODEL)
    model = models.vit_b_16(weights=None) # Tidak perlu weights pre-trained, kita akan load
    in_features = model.heads.head.in_features

    # 2. Buat ulang head klasifikasi (MATCHING THE SAVED MODEL - WITHOUT nn.Sequential)
    # Based on the error, it seems the saved model did NOT have the nn.Sequential wrapper.
    model.heads.head = nn.Linear(in_features, NUM_CLASSES)


    # 3. Muat bobot (weights) terbaik yang disimpan
    try:
        model.load_state_dict(torch.load('best_model.pth'))
        print("Model berhasil dimuat.")
    except RuntimeError as e:
        print(f"Error loading state_dict: {e}")
        print("Attempting to load with strict=False to handle potential key mismatches.")
        # This might happen if the saved state_dict has slight variations
        model.load_state_dict(torch.load('best_model.pth'), strict=False)
        print("Model loaded with strict=False.")

    model = model.to(DEVICE)
    model.eval() # PENTING: Set model ke mode evaluasi


    # --- 6. Melakukan Prediksi ---
    results = []

    with torch.no_grad(): # Matikan gradien untuk inferensi
        for images, file_ids in tqdm(test_loader, desc="Melakukan Prediksi"):
            images = images.to(DEVICE)

            # Dapatkan output model
            outputs = model(images)

            # Dapatkan prediksi (indeks dengan nilai tertinggi)
            _, predicted_indices = torch.max(outputs.data, 1)

            # Ubah indeks ke nama kelas
            predicted_labels = [class_names[i] for i in predicted_indices]

            # Simpan hasil (ID file dan label prediksi)
            for i in range(len(file_ids)):
                results.append([file_ids[i], predicted_labels[i]])

    # --- 7. Menyimpan Hasil ke CSV ---
    print("Prediksi selesai. Menyimpan ke CSV...")

    # Buat DataFrame Pandas dari hasil
    df = pd.DataFrame(results, columns=['ID', 'label'])

    # Urutkan berdasarkan ID (jika ID-nya adalah angka)
    # Ini membantu jika ID-nya '1', '10', '2' agar menjadi '1', '2', '10'
    try:
        df['ID_num'] = df['ID'].astype(int)
        df = df.sort_values(by='ID_num').drop(columns=['ID_num'])
    except ValueError:
        # Jika ID bukan angka (misal: 'img_a', 'img_b'), urutkan berdasarkan alfabet
        df = df.sort_values(by='ID')

    # Simpan ke file CSV
    df.to_csv(CSV_OUTPUT_PATH, index=False)

    print(f"Berhasil! Hasil prediksi disimpan di: {CSV_OUTPUT_PATH}")

    # Tampilkan 5 baris pertama dari hasil
    print("\nContoh 5 baris pertama dari hasil:")
    print(df.head())

In [None]:
from google.colab import files

try:
    files.download('best_model.pth')
    print("File 'best_model.pth' berhasil didownload.")
except FileNotFoundError:
    print("Error: File 'best_model.pth' tidak ditemukan. Pastikan model sudah disimpan sebelumnya.")