In [2]:
import os

root_path = "dataset"

def count_files(path):
    return len([f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]) if os.path.exists(path) else 0

summary = []

# Loop kategori di dalam Training dan Testing
for mode in ["Training", "Testing"]:
    mode_path = os.path.join(root_path, mode)
    if os.path.isdir(mode_path):
        for category in os.listdir(mode_path):
            category_path = os.path.join(mode_path, category)
            if os.path.isdir(category_path):
                file_count = count_files(category_path)
                summary.append(f"{mode}/{category} -> {file_count} files")

# Cetak hasil
for line in summary:
    print(line)

Training/Fresh_Gills -> 613 files
Training/Nonfresh_Eyes -> 1028 files
Training/Fresh_Eyes -> 233 files
Training/Nonfresh_Gills -> 1265 files
Testing/Fresh_Gills -> 80 files
Testing/Nonfresh_Eyes -> 257 files
Testing/Fresh_Eyes -> 134 files
Testing/Nonfresh_Gills -> 316 files


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
# Dataset path (ubah sesuai kebutuhan Anda)
sdir = '/kaggle/input/fish-classification-dataset/'

# Bangun DataFrame
filepaths = []
labels = []

for data_type in ['Training', 'Testing']:
    data_path = os.path.join(sdir, data_type)
    for klass in os.listdir(data_path):
        class_path = os.path.join(data_path, klass)
        if os.path.isdir(class_path):
            for file in os.listdir(class_path):
                filepaths.append(os.path.join(class_path, file))
                labels.append(klass)

df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})
training_df = df[df['filepaths'].str.contains('/Training/')].copy()
testing_df = df[df['filepaths'].str.contains('/Testing/')].copy()

In [None]:
# Split training menjadi training dan validation
train_df, valid_df = train_test_split(training_df, test_size=0.15, stratify=training_df['labels'], random_state=42)

In [None]:
# ================================================
# 2. DATA GENERATOR
# ================================================
height, width, channels = 224, 224, 3
img_size = (height, width)
batch_size = 64
class_mode = 'categorical'

# Augmentasi data latih
train_aug = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Augmentasi untuk validasi dan testing
val_test_aug = ImageDataGenerator(rescale=1./255)

# Generator
train_gen = train_aug.flow_from_dataframe(train_df, x_col='filepaths', y_col='labels',
                                          target_size=img_size, class_mode=class_mode,
                                          batch_size=batch_size, shuffle=True)

valid_gen = val_test_aug.flow_from_dataframe(valid_df, x_col='filepaths', y_col='labels',
                                             target_size=img_size, class_mode=class_mode,
                                             batch_size=batch_size, shuffle=True)

testing_gen = val_test_aug.flow_from_dataframe(testing_df, x_col='filepaths', y_col='labels',
                                               target_size=img_size, class_mode=class_mode,
                                               batch_size=batch_size, shuffle=False)

classes = list(train_gen.class_indices.keys())
class_count = len(classes)
labels = train_gen.classes
class_weights = compute_class_weight('balanced', classes=np.unique(labels), y=labels)
class_weights = dict(enumerate(class_weights))

In [None]:
# ================================================
# 3. MODEL: EfficientNetV2 + Fine-Tuning
# ================================================
base_model = tf.keras.models.load_model('/kaggle/input/efficientnetv2/keras/efficientnetv2_s_imagenet_classifier/2')
base_model = tf.keras.Model(inputs=base_model.input, outputs=base_model.layers[-4].output)

# Fine-tuning beberapa layer terakhir
for layer in base_model.layers[:-20]:
    layer.trainable = False
for layer in base_model.layers[-20:]:
    layer.trainable = True

inputs = layers.Input(shape=(height, width, channels))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(1024, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(class_count, activation='softmax')(x)

model = models.Model(inputs, outputs)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
print("Jumlah data latih:", train_gen.samples)
print("Jumlah data validasi:", valid_gen.samples)
print("Jumlah data testing:", testing_gen.samples)


In [None]:
# ================================================
# 4. TRAINING MODEL DENGAN CLASS WEIGHT
# ================================================
callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True, monitor='val_loss'),
    ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_accuracy')
]

steps_per_epoch = train_gen.n // train_gen.batch_size
validation_steps = valid_gen.n // valid_gen.batch_size

history = model.fit(
    train_gen,
    validation_data=valid_gen,
    epochs=30,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    callbacks=callbacks,
    class_weight=class_weights
)


In [None]:
# ================================================
# 5. EVALUASI MODEL
# ================================================
test_loss, test_acc = model.evaluate(testing_gen)
print(f"Test Accuracy: {test_acc:.4f}")

y_pred = model.predict(testing_gen)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = testing_gen.classes

# Confusion Matrix
cm = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=classes, yticklabels=classes)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

# Classification Report
print("Classification Report:\n", classification_report(y_true, y_pred_classes, target_names=classes))

In [None]:
# ================================================
# 6. VISUALISASI HISTORY
# ================================================
def plot_history(history):
    plt.figure(figsize=(14, 5))

    # Accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Val Accuracy')
    plt.legend()
    plt.title('Accuracy')

    # Loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.legend()
    plt.title('Loss')

    plt.show()

plot_history(history)