In [2]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os
import numpy as np

# Tentukan perangkat (gunakan GPU jika tersedia)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Menggunakan device: {device}")

# 1. Definisikan transformasi
# Model pre-trained ImageNet mengharapkan input 224x224
# dan normalisasi dengan mean/std spesifik.
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 2. Buat Dataset kustom
class ImageFolderDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.transform = transform
        # Dapatkan semua path gambar (asumsikan format .jpg, .png)
        self.image_paths = []
        for root, _, files in os.walk(folder_path):
            for file in files:
                if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    self.image_paths.append(os.path.join(root, file))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        # Buka gambar menggunakan PIL
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
            
        return image, img_path

# --- Penggunaan ---
# Ganti '/path/ke/folder/gambar_makanan' dengan path Anda
DATA_DIR = r'C:\Users\taqiy\Downloads\data-mining-action-2025\train\train' 

dataset = ImageFolderDataset(folder_path=DATA_DIR, transform=preprocess)
# Gunakan batch_size yang lebih besar (misal 64) jika GPU Anda kuat
dataloader = DataLoader(dataset, batch_size=32, shuffle=False) 

print(f"Total gambar ditemukan: {len(dataset)}")

Menggunakan device: cuda
Total gambar ditemukan: 4257


In [3]:
# Muat model ResNet50 pre-trained
model = models.resnet50(pretrained=True)

# "Potong" lapisan klasifikasi terakhir (fc layer)
# Output dari lapisan 'avgpool' (sebelum fc) adalah 2048-dimensional vector
model.fc = nn.Identity()

# Pindahkan model ke device (GPU/CPU) dan set ke mode evaluasi
model.to(device)
model.eval() 

print("Model ResNet50 (tanpa fc layer) berhasil dimuat.")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\taqiy/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:42<00:00, 2.40MB/s]


Model ResNet50 (tanpa fc layer) berhasil dimuat.


In [4]:
all_features = []
all_image_paths = []

print("Memulai ekstraksi fitur...")

# Gunakan torch.no_grad() karena kita tidak perlu menghitung gradien (bukan training)
with torch.no_grad():
    for inputs, paths in dataloader:
        # Pindahkan data ke device
        inputs = inputs.to(device)
        
        # Dapatkan output fitur dari model
        features = model(inputs)
        
        # Pindahkan fitur kembali ke CPU dan ubah ke NumPy
        all_features.append(features.cpu().numpy())
        all_image_paths.extend(paths) # Simpan path gambarnya

# Gabungkan semua batch fitur menjadi satu array NumPy besar
all_features = np.concatenate(all_features, axis=0)

print(f"Ekstraksi selesai.")
print(f"Bentuk array fitur: {all_features.shape}") # Harusnya (4000+, 2048)

Memulai ekstraksi fitur...
Ekstraksi selesai.
Bentuk array fitur: (4257, 2048)


In [5]:
from sklearn.cluster import KMeans
import pandas as pd

# Tentukan jumlah cluster (k) sesuai jumlah makanan Anda
N_CLUSTERS = 15

print(f"Memulai K-Means clustering dengan k={N_CLUSTERS}...")

# Inisialisasi dan jalankan K-Means
# n_init='auto' untuk versi scikit-learn baru, atau n_init=10 untuk yang lama
kmeans = KMeans(n_clusters=N_CLUSTERS, random_state=42, n_init='auto')
kmeans.fit(all_features)

# Dapatkan label cluster untuk setiap gambar
cluster_labels = kmeans.labels_

# Buat DataFrame untuk menyimpan hasil
results_df = pd.DataFrame({
    'image_path': all_image_paths,
    'cluster_id': cluster_labels
})

print("Clustering selesai.")
print(results_df.head())

Memulai K-Means clustering dengan k=15...
Clustering selesai.
                                          image_path  cluster_id
0  C:\Users\taqiy\Downloads\data-mining-action-20...           5
1  C:\Users\taqiy\Downloads\data-mining-action-20...           8
2  C:\Users\taqiy\Downloads\data-mining-action-20...          13
3  C:\Users\taqiy\Downloads\data-mining-action-20...           1
4  C:\Users\taqiy\Downloads\data-mining-action-20...          13


In [6]:
# Cek isi dari setiap cluster
for i in range(N_CLUSTERS):
    print(f"\n--- CONTOH GAMBAR DARI CLUSTER {i} ---")
    
    # Ambil 5 path gambar secara acak dari cluster ini
    sample_paths = results_df[results_df['cluster_id'] == i]['image_path'].sample(5, replace=True).tolist()
    
    for path in sample_paths:
        print(path)

# --- Proses Manual Anda ---
# Anda akan melihat output seperti:
# --- CONTOH GAMBAR DARI CLUSTER 0 ---
# /path/ke/folder/gambar_makanan/rawon_123.jpg
# /path/ke/folder/gambar_makanan/rawon_456.jpg
# /path/ke/folder/gambar_makanan/rawon_789.jpg
# ...
# 
# Dari sini, Anda menyimpulkan: Cluster 0 = Rawon
# 
# --- CONTOH GAMBAR DARI CLUSTER 1 ---
# /path/ke/folder/gambar_makanan/sate_padang_001.jpg
# /path/ke/folder/gambar_makanan/sate_padang_002.jpg
# ...
#
# Dari sini, Anda menyimpulkan: Cluster 1 = Sate Padang


--- CONTOH GAMBAR DARI CLUSTER 0 ---
C:\Users\taqiy\Downloads\data-mining-action-2025\train\train\z3kury.jpg
C:\Users\taqiy\Downloads\data-mining-action-2025\train\train\l99o0v.jpg
C:\Users\taqiy\Downloads\data-mining-action-2025\train\train\v3wg7q.jpg
C:\Users\taqiy\Downloads\data-mining-action-2025\train\train\7fnhtp.jpg
C:\Users\taqiy\Downloads\data-mining-action-2025\train\train\btsqo8.jpg

--- CONTOH GAMBAR DARI CLUSTER 1 ---
C:\Users\taqiy\Downloads\data-mining-action-2025\train\train\4ezkeo.jpg
C:\Users\taqiy\Downloads\data-mining-action-2025\train\train\n7ob1y.jpg
C:\Users\taqiy\Downloads\data-mining-action-2025\train\train\l6tabn.jpg
C:\Users\taqiy\Downloads\data-mining-action-2025\train\train\4427yo.jpg
C:\Users\taqiy\Downloads\data-mining-action-2025\train\train\tzhr8f.jpg

--- CONTOH GAMBAR DARI CLUSTER 2 ---
C:\Users\taqiy\Downloads\data-mining-action-2025\train\train\6ktpj1.jpg
C:\Users\taqiy\Downloads\data-mining-action-2025\train\train\gpec3p.jpg
C:\Users\taqiy\Downloa