In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import pickle as pkl
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalMaxPool2D
from sklearn.neighbors import NearestNeighbors
from numpy.linalg import norm

In [2]:
# 1. Membaca file styles.csv
styles = pd.read_csv('styles.csv')  # Pastikan nama file sesuai
styles['image_name'] = styles['id'].astype(str) + ".jpg"  
styles['category'] = styles['articleType']  

print(styles.head())

       id  gender masterCategory subCategory articleType baseColour  season  \
0  1163.0     Men        Apparel     Topwear     Tshirts       Blue  Summer   
1  1164.0     Men        Apparel     Topwear     Tshirts       Blue  Winter   
2  1165.0     Men        Apparel     Topwear     Tshirts       Blue  Summer   
3  1525.0  Unisex    Accessories        Bags   Backpacks  Navy Blue    Fall   
4  1526.0  Unisex    Accessories        Bags   Backpacks      Black    Fall   

     year   usage                                productDisplayName  \
0  2011.0  Sports  Nike Sahara Team India Fanwear Round Neck Jersey   
1  2015.0  Sports           Nike Men Blue T20 Indian Cricket Jersey   
2  2013.0  Sports               Nike Mean Team India Cricket Jersey   
3  2010.0  Casual                      Puma Deck Navy Blue Backpack   
4  2010.0  Sports                       Puma Big Cat Backpack Black   

  Unnamed: 10  image_name   category  
0         NaN  1163.0.jpg    Tshirts  
1         NaN  1164.

In [3]:
# 2. Membuat dictionary untuk menghubungkan file dengan kategori
image_labels = dict(zip(styles['image_name'], styles['category']))

In [4]:
# 3. Mengambil semua nama file gambar dari folder 'images'
filenames = []
for file in os.listdir('images'):
    if file.endswith('.jpg'):  
        filenames.append(os.path.join('images', file))

print(f"Total images: {len(filenames)}")

Total images: 11961


In [5]:
# 4. Mengonfigurasi model ResNet50
model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model.trainable = False
model = tf.keras.models.Sequential([model, GlobalMaxPool2D()])
model.summary()

In [6]:
# 5. Fungsi untuk ekstraksi fitur
def extract_features_from_images(image_path, model):
    img = image.load_img(image_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_expand_dim = np.expand_dims(img_array, axis=0)
    img_preprocess = preprocess_input(img_expand_dim)
    result = model.predict(img_preprocess).flatten()
    norm_result = result / norm(result)
    return norm_result

In [None]:
# 6. Ekstraksi fitur dan menghubungkannya dengan label
image_features = []
labels = []

for file in filenames:
    # Ekstraksi fitur gambar
    features = extract_features_from_images(file, model)
    image_features.append(features)

    # Ambil label berdasarkan nama file
    file_name = os.path.basename(file)  # Ambil nama file tanpa folder
    label = image_labels.get(file_name, 'Unknown')  # Default 'Unknown' jika tidak ada label
    labels.append(label)

print(f"Extracted features for {len(image_features)} images.")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 332ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 338ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 457ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 382ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 309ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 327ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 294ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 297ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 290ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 319ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 313ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 297ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [4]:
# 7. Menyimpan fitur dan label ke file pickle
with open('features_with_labels.pkl', 'wb') as f:
    pkl.dump((image_features, labels, filenames), f)
print("Features and labels saved to 'features_with_labels.pkl'.")

NameError: name 'image_features' is not defined

In [None]:
# 8. Memuat kembali data dari file pickle
with open('features_with_labels.pkl', 'rb') as f:
    loaded_features, loaded_labels, loaded_filenames = pkl.load(f)

print(f"Loaded {len(loaded_features)} features and labels.")
print(f"Example label: {loaded_labels[0]}, Filename: {loaded_filenames[0]}")

In [None]:
# 9. Melatih model Nearest Neighbors
knn = NearestNeighbors(n_neighbors=5, metric='euclidean')
knn.fit(loaded_features)

In [None]:
# 10. Fungsi untuk menemukan gambar serupa
def find_similar_images(query_image_path, knn_model, feature_model):
    query_features = extract_features_from_images(query_image_path, feature_model)
    distances, indices = knn_model.kneighbors([query_features])
    return distances, indices

In [None]:
# 11. Contoh mencari gambar serupa
query_image_path = 'images/123.jpg'  # Ganti dengan path gambar yang ingin dicari
distances, indices = find_similar_images(query_image_path, knn, model)

In [None]:
# 12. Menampilkan hasil
print("Gambar serupa ditemukan:")
for i in indices[0]:
    print(f"- {loaded_filenames[i]} (Label: {loaded_labels[i]})")