## Extracting Image Feature using MobileNetV1
preprocessing image sekalian dilakuin disini 

### Import Library

In [11]:
import pandas as pd
import numpy as np
import tensorflow as tf
import re
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import mobilenet
from tqdm import tqdm
import os
# Menonaktifkan pesan log TensorFlow yang kurang relevan
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'



### Load Dataset Train & Test
Load dari hasil split_data

In [12]:
def load_split_data(split_dir, train_filename, test_filename):
    """Memuat dataset train dan test, dan mendeteksi rasio split dari nama file."""
    
    train_path = os.path.join(split_dir, train_filename)
    test_path = os.path.join(split_dir, test_filename)
    
    try:
        train_df = pd.read_csv(train_path)
        test_df = pd.read_csv(test_path)
        
        # Ekstrak angka dari nama file untuk mendapatkan rasio (contoh: '8020')
        split_ratio_str = re.search(r'(\d+)', train_filename).group(1)
        
        print(f"Data latih berhasil dimuat: {train_df.shape[0]} baris.")
        print(f"Data uji berhasil dimuat: {test_df.shape[0]} baris.")
        print(f"Rasio split terdeteksi: {split_ratio_str}")
        
        return train_df, test_df, split_ratio_str
        
    except FileNotFoundError as e:
        print(f"Error: Salah satu file tidak ditemukan. {e}")
        return None, None, None

# --- Konfigurasi dan Eksekusi (CUSTOM)---
SPLIT_DATA_DIR = r'E:\$7th\TA\Eksploring_TF-IDF\DATA\split_data'
TRAIN_FILENAME = 'train8020.csv'  
TEST_FILENAME = 'test8020.csv'

train_df, test_df, split_ratio = load_split_data(SPLIT_DATA_DIR, TRAIN_FILENAME, TEST_FILENAME)

Data latih berhasil dimuat: 13044 baris.
Data uji berhasil dimuat: 3261 baris.
Rasio split terdeteksi: 8020


### Setup Model MobileNetV1
Konfigurasi model untuk ekstraksi

In [4]:
def load_mobilenet_model():
    """Memuat model MobileNetV1 pre-trained tanpa lapisan klasifikasi."""
    
    # Memuat model MobileNetV1 dengan bobot dari ImageNet -> pretrained imagenet
    # include_top=False berarti kita membuang lapisan 'fully connected' terakhir
    # pooling='avg' menambahkan lapisan Global Average Pooling untuk menghasilkan vektor fitur
    model = mobilenet.MobileNet(
        weights='imagenet', 
        include_top=False, 
        input_shape=(224, 224, 3),
        pooling='avg'
    )
    print("Model MobileNetV1 berhasil dimuat.")
    return model

# --- Eksekusi ---
feature_extractor_model = load_mobilenet_model()

# Menampilkan ringkasan arsitektur model
feature_extractor_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_224_tf_no_top.h5
[1m17225924/17225924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step
Model MobileNetV1 berhasil dimuat.


### Preprocessing Image 
Setup preprocessing image

In [13]:
def preprocess_and_extract_features(img_path, model):
    """
    Memproses satu gambar (resize, normalisasi) dan mengekstrak
    vektor fiturnya menggunakan model yang diberikan.
    
    Args:
        img_path (str): Path ke file gambar.
        model (tf.keras.Model): Model untuk ekstraksi fitur (misal: MobileNetV1).
        
    Returns:
        np.ndarray: Vektor fitur gambar, atau None jika gambar gagal diproses.
    """
    try:
        # 1. Muat gambar dan resize ke 224x224 piksel
        img = image.load_img(img_path, target_size=(224, 224))
        
        # 2. Ubah gambar menjadi array numpy
        img_array = image.img_to_array(img)
        
        # 3. Tambahkan dimensi batch (model Keras mengharapkan input batch)
        img_expanded = np.expand_dims(img_array, axis=0)
        
        # 4. Normalisasi piksel sesuai standar MobileNet (-1 sampai 1)
        img_preprocessed = mobilenet.preprocess_input(img_expanded)
        
        # 5. Ekstraksi fitur (tanpa menghitung gradien)
        features = model.predict(img_preprocessed, verbose=0)
        
        # 'Flatten' output untuk mendapatkan vektor 1D
        return features.flatten()
    
    except Exception as e:
        # Tangani jika file gambar corrupt atau tidak bisa dibuka
        # print(f"Gagal memproses {img_path}: {e}")
        return None

print("Fungsi 'preprocess_and_extract_features' siap digunakan.")

Fungsi 'preprocess_and_extract_features' siap digunakan.


### Melakukan Ekstraksi Fitur
ekstrak fitur gambar

In [8]:
def run_feature_extraction(df, model, description):
    """Menerapkan ekstraksi fitur ke seluruh DataFrame."""
    
    # Inisialisasi tqdm untuk pandas
    tqdm.pandas(desc=description)
    
    # Terapkan fungsi ke kolom 'image_path' dan tampilkan progress bar
    feature_vectors = df['image_path'].progress_apply(lambda path: preprocess_and_extract_features(path, model))
    
    return np.array(feature_vectors.tolist())

# --- Eksekusi untuk Data Latih dan Uji ---
print("Memulai ekstraksi fitur untuk data train...")
X_train_image = run_feature_extraction(train_df, feature_extractor_model, "Extracting Train Features")

print("\nMemulai ekstraksi fitur untuk data uji...")
X_test_image = run_feature_extraction(test_df, feature_extractor_model, "Extracting Test Features")

print("\nProses ekstraksi fitur selesai.")
print(f"Ukuran matriks fitur gambar train: {X_train_image.shape}")
print(f"Ukuran matriks fitur gambar test: {X_test_image.shape}")

Memulai ekstraksi fitur untuk data train...


Extracting Train Features:   5%|▍         | 641/13044 [01:09<22:15,  9.28it/s]


KeyboardInterrupt: 

### Save File Ekstraksi
simpen hasil ekstraksi fitur gambar dari mobilenetv1

In [None]:
def save_features(features, output_path):
    """Menyimpan matriks fitur ke dalam file .npy."""
    
    try:
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        np.save(output_path, features)
        print(f"Fitur berhasil disimpan di: {output_path}")
    except Exception as e:
        print(f"Gagal menyimpan fitur. Error: {e}")

# --- Konfigurasi dan Eksekusi ---
IMAGE_ARTIFACTS_DIR = r'E:\$7th\TA\Eksploring_TF-IDF\DATA\image_artifacts'
dynamic_folder_name = f'mobilenetv1_{split_ratio}'

# Simpan fitur train
save_features(X_train_image, os.path.join(IMAGE_ARTIFACTS_DIR, dynamic_folder_name, 'X_train_image.npy'))

# Simpan fitur test
save_features(X_test_image, os.path.join(IMAGE_ARTIFACTS_DIR, dynamic_folder_name, 'X_test_image.npy'))