In [None]:
from google.colab import drive
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

drive.mount('/content/drive')

csv_path = "/content/CSV BANGKIT - styles.csv"
image_dir = "/content/drive/MyDrive/BANGKIT/dataset-an/output_images"

In [None]:
def load_dataframe(csv_path, image_dir, nrows=None):
    df = pd.read_csv(csv_path, nrows=nrows, on_bad_lines='skip')
    df['image'] = df.apply(lambda row: str(row['id']) + ".jpg", axis=1)
    df = df[df['image'].apply(lambda x: os.path.exists(os.path.join(image_dir, x)))]
    return df

df = load_dataframe(csv_path, image_dir, nrows=5000)

In [None]:
df['Category'] = df['Category'].replace('Flip Flops', 'Sandal')
df = df[df['Category'] != 'Innerwear']

target_samples = 2000
balanced_data = pd.DataFrame()
for category in df['Category'].unique():
    category_data = df[df['Category'] == category]
    if len(category_data) <= target_samples:
        balanced_data = pd.concat([balanced_data, category_data])
    else:
        sampled_data = category_data.sample(target_samples, replace=False)
        balanced_data = pd.concat([balanced_data, sampled_data])
df = balanced_data.reset_index(drop=True)

In [None]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['Category'])

In [None]:
def load_and_preprocess_image(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])
    image = tf.keras.applications.resnet50.preprocess_input(image)
    return image, label

train_dataset = tf.data.Dataset.from_tensor_slices((train_df['image'].values, train_df['Category'].values))
test_dataset = tf.data.Dataset.from_tensor_slices((test_df['image'].values, test_df['Category'].values))

train_dataset = train_dataset.map(load_and_preprocess_image)
test_dataset = test_dataset.map(load_and_preprocess_image)

base_model = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze the pre-trained layers

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(len(train_df['Category'].unique()), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

batch_size = 32
train_generator = datagen.flow_from_dataframe(
    train_df,
    directory=image_dir,
    x_col='image',
    y_col='Category',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='sparse'
)

test_generator = datagen.flow_from_dataframe(
    test_df,
    directory=image_dir,
    x_col='image',
    y_col='Category',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='sparse'
)

accuracy_threshold_callback = callbacks.EarlyStopping(monitor='val_accuracy',
                                                      patience=5,
                                                      min_delta=0.01,
                                                      mode='max',
                                                      baseline=0.98)

history = model.fit(
    train_generator,
    epochs=20,
    validation_data=test_generator,
    callbacks=[accuracy_threshold_callback]
)