In [None]:
!pip install torch torchvision numpy matplotlib scikit-learn tqdm

import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Menggunakan device: {device}")

In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')

zip_train_path = '/content/drive/MyDrive/dataset/train.zip'
zip_val_path   = '/content/drive/MyDrive/dataset/val.zip'

base_dir  = '/content/temp_dataset'
train_dir = os.path.join(base_dir, 'train')
val_dir   = os.path.join(base_dir, 'val')

def extract_zip(zip_path, dest_path):
    if not os.path.exists(dest_path):
        print(f"Membuat folder {dest_path}...")
        os.makedirs(dest_path, exist_ok=True)

        print(f"Mengekstrak {zip_path} ke {dest_path}...")
        !unzip -q "$zip_path" -d "$dest_path"
        print(f"Selesai mengekstrak ke {dest_path}")
    else:
        print(f"Folder {dest_path} sudah ada. Melewati ekstraksi.")

if os.path.exists(zip_train_path):
    extract_zip(zip_train_path, train_dir)
else:
    print(f"PERINGATAN: File {zip_train_path} tidak ditemukan di Drive!")

if os.path.exists(zip_val_path):
    extract_zip(zip_val_path, val_dir)
else:
    print(f"PERINGATAN: File {zip_val_path} tidak ditemukan di Drive!")

DATA_DIR = base_dir
TRAIN_DIR = train_dir
VAL_DIR = val_dir

print("\n--- Pengecekan Struktur Folder ---")
if os.path.exists(TRAIN_DIR):
    print(f"Isi {TRAIN_DIR}: {os.listdir(TRAIN_DIR)}")

    subfolders = os.listdir(TRAIN_DIR)
    if len(subfolders) > 0:
        first_sub = os.path.join(TRAIN_DIR, subfolders[0])
        if os.path.isdir(first_sub):
             print(f"Contoh isi dalam '{subfolders[0]}': {os.listdir(first_sub)[:5]}")

if os.path.exists(VAL_DIR):
    print(f"Isi {VAL_DIR}: {os.listdir(VAL_DIR)}")

In [6]:
class FFTTransform:
    def __call__(self, img):
        #Grayscale
        img_gray = img.convert('L')
        img_array = np.array(img_gray)

        #Fast Fourier Transform
        f = np.fft.fft2(img_array)
        fshift = np.fft.fftshift(f) #frekuensi rendah ke tengah

        #Hitung Magnitude Spectrum
        magnitude_spectrum = 20 * np.log(np.abs(fshift) + 1e-8)

        #Normalisasi ke range 0-255
        magnitude_spectrum = np.nan_to_num(magnitude_spectrum)
        ms_min = np.min(magnitude_spectrum)
        ms_max = np.max(magnitude_spectrum)

        # Scaling min-max
        if ms_max - ms_min > 0:
            img_fft = 255 * (magnitude_spectrum - ms_min) / (ms_max - ms_min)
        else:
            img_fft = np.zeros_like(magnitude_spectrum)

        img_fft = img_fft.astype(np.uint8)

        #Konversi PIL Image
        img_fft_pil = Image.fromarray(img_fft).convert("RGB")

        return img_fft_pil

# def visualize_fft_sample(image_path):
#     img = Image.open(image_path)
#     transformer = FFTTransform()
#     fft_img = transformer(img)

#     fig, ax = plt.subplots(1, 2, figsize=(10, 5))
#     ax[0].imshow(img)
#     ax[0].set_title("Original Image")
#     ax[1].imshow(fft_img)
#     ax[1].set_title("FFT Spectrum Input")
#     plt.show()

# visualize_fft_sample('/content/drive/MyDrive/dataset/train/train/ai/imgAI602.jpg')

In [11]:
import os
import shutil

def remove_macos_artifacts(path):
    print(f"Membersihkan artefak MacOS di: {path}")
    count_files = 0
    count_folders = 0

    for root, dirs, files in os.walk(path):
        # 1. Hapus folder __MACOSX
        if '__MACOSX' in dirs:
            rm_path = os.path.join(root, '__MACOSX')
            try:
                shutil.rmtree(rm_path)
                count_folders += 1
            except Exception as e:
                print(f"Gagal hapus {rm_path}: {e}")
            # Hapus dari list dirs agar tidak ditelusuri
            dirs.remove('__MACOSX')

        # 2. Hapus file diawali ._
        for file in files:
            if file.startswith('._'):
                file_path = os.path.join(root, file)
                try:
                    os.remove(file_path)
                    count_files += 1
                except Exception as e:
                    print(f"Gagal hapus {file_path}: {e}")

    print(f"Selesai! Dihapus: {count_folders} folder __MACOSX dan {count_files} file metadata.")

# Jalankan pembersihan di folder data kita
# Pastikan DATA_DIR mengarah ke '/content/temp_dataset' atau lokasi unzip Anda
remove_macos_artifacts(DATA_DIR)

Membersihkan artefak MacOS di: /content/temp_dataset
Selesai! Dihapus: 2 folder __MACOSX dan 0 file metadata.


In [12]:
# --- JALANKAN ULANG BAGIAN INI SETELAH PEMBERSIHAN ---

image_datasets = {
    'train': torchvision.datasets.ImageFolder(TRAIN_DIR, data_transforms['train']),
    'val': torchvision.datasets.ImageFolder(VAL_DIR, data_transforms['val'])
}

dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=32, shuffle=True, num_workers=2),
    'val': DataLoader(image_datasets['val'], batch_size=32, shuffle=False, num_workers=2)
}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
print("DataLoader berhasil diperbarui tanpa file sampah MacOS.")
print(f"Total Train: {dataset_sizes['train']}")

DataLoader berhasil diperbarui tanpa file sampah MacOS.
Total Train: 17410


In [13]:
# --- JALANKAN ULANG BAGIAN INI SETELAH PEMBERSIHAN ---

image_datasets = {
    'train': torchvision.datasets.ImageFolder(TRAIN_DIR, data_transforms['train']),
    'val': torchvision.datasets.ImageFolder(VAL_DIR, data_transforms['val'])
}

dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=32, shuffle=True, num_workers=2),
    'val': DataLoader(image_datasets['val'], batch_size=32, shuffle=False, num_workers=2)
}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
print("DataLoader berhasil diperbarui tanpa file sampah MacOS.")
print(f"Total Train: {dataset_sizes['train']}")

DataLoader berhasil diperbarui tanpa file sampah MacOS.
Total Train: 17410


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [14]:
model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)

num_ftrs = model.classifier[1].in_features

model.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(num_ftrs, 2)
)

model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [17]:
import os
from PIL import Image

def validate_and_clean_images(root_dir):
    print(f"üîç Memulai pengecekan integritas gambar di: {root_dir}")
    print("Proses ini mungkin memakan waktu 1-2 menit...")

    bad_files = 0
    checked_files = 0

    # Gunakan os.walk untuk menelusuri semua subfolder sedalam apapun
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            # Cek ekstensi file gambar
            if file.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff')):
                file_path = os.path.join(root, file)
                checked_files += 1

                try:
                    # Coba buka gambar
                    with Image.open(file_path) as img:
                        # verify() cek header file rusak/tidaknya
                        img.verify()
                except (IOError, SyntaxError, Image.UnidentifiedImageError) as e:
                    print(f"‚ùå File rusak ditemukan: {file_path}")
                    try:
                        os.remove(file_path)
                        print(f"   üóëÔ∏è Berhasil dihapus.")
                        bad_files += 1
                    except Exception as del_err:
                        print(f"   ‚ö†Ô∏è Gagal menghapus: {del_err}")

    print("-" * 30)
    print(f"‚úÖ Selesai! Total diperiksa: {checked_files}")
    print(f"üóëÔ∏è Total file rusak dihapus: {bad_files}")

# Jalankan di folder utama dataset
# Pastikan DATA_DIR mengarah ke '/content/temp_dataset' (tempat unzip tadi)
validate_and_clean_images(DATA_DIR)

üîç Memulai pengecekan integritas gambar di: /content/temp_dataset
Proses ini mungkin memakan waktu 1-2 menit...
‚ùå File rusak ditemukan: /content/temp_dataset/train/train/train/ai/imgAI5336.jpg
   üóëÔ∏è Berhasil dihapus.
------------------------------
‚úÖ Selesai! Total diperiksa: 18610
üóëÔ∏è Total file rusak dihapus: 1


In [18]:
# Setup ulang datasets & dataloaders
image_datasets = {
    'train': torchvision.datasets.ImageFolder(TRAIN_DIR, data_transforms['train']),
    'val': torchvision.datasets.ImageFolder(VAL_DIR, data_transforms['val'])
}

dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=32, shuffle=True, num_workers=2),
    'val': DataLoader(image_datasets['val'], batch_size=32, shuffle=False, num_workers=2)
}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
print(f"Dataset di-refresh. Total Train: {dataset_sizes['train']}, Val: {dataset_sizes['val']}")

Dataset di-refresh. Total Train: 17409, Val: 1200


In [None]:
model_trained, history = train_model(model, criterion, optimizer, num_epochs=10)

Epoch 1/10
----------


train: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 545/545 [01:59<00:00,  4.57it/s]


train Loss: 0.0005 Acc: 1.0000


val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 38/38 [00:05<00:00,  6.68it/s]


val Loss: 0.0004 Acc: 1.0000
Epoch 2/10
----------


train: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 545/545 [01:59<00:00,  4.56it/s]


train Loss: 0.0002 Acc: 1.0000


val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 38/38 [00:05<00:00,  6.74it/s]


val Loss: 0.0009 Acc: 0.9992
Epoch 3/10
----------


train: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 545/545 [01:58<00:00,  4.60it/s]


train Loss: 0.0001 Acc: 1.0000


val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 38/38 [00:06<00:00,  5.53it/s]


val Loss: 0.0023 Acc: 0.9983
Epoch 4/10
----------


train: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 545/545 [01:59<00:00,  4.57it/s]


train Loss: 0.0000 Acc: 1.0000


val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 38/38 [00:05<00:00,  6.92it/s]


val Loss: 0.0021 Acc: 0.9983
Epoch 5/10
----------


train: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 545/545 [01:58<00:00,  4.58it/s]


train Loss: 0.0000 Acc: 1.0000


val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 38/38 [00:06<00:00,  6.25it/s]


val Loss: 0.0023 Acc: 0.9983
Epoch 6/10
----------


train: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 545/545 [01:59<00:00,  4.57it/s]


train Loss: 0.0000 Acc: 1.0000


val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 38/38 [00:07<00:00,  5.43it/s]


val Loss: 0.0024 Acc: 0.9983
Epoch 7/10
----------


train:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 405/545 [01:29<00:25,  5.47it/s]

In [None]:
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history['train_acc'], label='Train Acc')
plt.plot(history['val_acc'], label='Val Acc')
plt.title('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history['train_loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Val Loss')
plt.title('Loss')
plt.legend()
plt.show()

from google.colab import files
files.download('bestModelV1.pth')