In [2]:
!pip install kagglehub --quiet

import kagglehub
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import pandas as pd

path = kagglehub.dataset_download("phucthaiv02/butterfly-image-classification")
print("Dataset downloaded to:", path)

!ls "$path"

train_dir = os.path.join(path, "train")
test_dir = os.path.join(path, "test")
train_csv = os.path.join(path, "Training_set.csv")
test_csv = os.path.join(path, "Testing_set.csv")

train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)

print("Train Dir:", train_dir)
print("Test Dir:", test_dir)
print("Classes in train.csv:", train_df['label'].nunique())

img_size = (128, 128)
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

train_gen = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=train_dir,
    x_col="filename",
    y_col="label",
    target_size=img_size,
    batch_size=batch_size,
    subset="training",
    class_mode="categorical"
)

val_gen = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=train_dir,
    x_col="filename",
    y_col="label",
    target_size=img_size,
    batch_size=batch_size,
    subset="validation",
    class_mode="categorical"
)

test_datagen = ImageDataGenerator(rescale=1./255)
test_gen = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=test_dir,
    x_col="filename",
    target_size=img_size,
    batch_size=batch_size,
    class_mode=None,
    shuffle=False
)
num_classes = len(train_gen.class_indices)

model = models.Sequential([
    layers.Input(shape=(128,128,3)),
    layers.Conv2D(32, (3,3), activation="relu"),
    layers.MaxPooling2D(2,2),

    layers.Conv2D(64, (3,3), activation="relu"),
    layers.MaxPooling2D(2,2),

    layers.Conv2D(128, (3,3), activation="relu"),
    layers.MaxPooling2D(2,2),

    layers.Flatten(),
    layers.Dense(128, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation="softmax")
])

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.summary()
callbacks = [
    EarlyStopping(monitor="val_loss", patience=30, restore_best_weights=True),
    ModelCheckpoint("best_butterfly_model.keras", monitor="val_accuracy", save_best_only=True)
]
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=100,
    callbacks=callbacks
)

val_loss, val_acc = model.evaluate(val_gen)
print(f"Validation Accuracy: {val_acc*100:.2f}%")

test_preds = model.predict(test_gen, verbose=1)
print("Predictions shape:", test_preds.shape)

model.save("final_butterfly_model.keras")


Using Colab cache for faster access to the 'butterfly-image-classification' dataset.
Dataset downloaded to: /kaggle/input/butterfly-image-classification
test  Testing_set.csv  train  Training_set.csv
Train Dir: /kaggle/input/butterfly-image-classification/train
Test Dir: /kaggle/input/butterfly-image-classification/test
Classes in train.csv: 75
Found 5200 validated image filenames belonging to 75 classes.
Found 1299 validated image filenames belonging to 75 classes.
Found 2786 validated image filenames.


  self._warn_if_super_not_called()


Epoch 1/100
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 281ms/step - accuracy: 0.0265 - loss: 4.3049 - val_accuracy: 0.0816 - val_loss: 3.7729
Epoch 2/100
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 74ms/step - accuracy: 0.1184 - loss: 3.6952 - val_accuracy: 0.3010 - val_loss: 2.9093
Epoch 3/100
[1m 38/163[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m7s[0m 60ms/step - accuracy: 0.2629 - loss: 2.9419

KeyboardInterrupt: 

In [1]:
best_train_acc = max(history.history['accuracy'])
print(f"Best Training Accuracy: {best_train_acc*100:.2f}%")

best_val_acc = max(history.history['val_accuracy'])
print(f"Best Validation Accuracy: {best_val_acc*100:.2f}%")

val_loss, val_acc = model.evaluate(val_gen, verbose=0)
print(f"Final Validation Accuracy: {val_acc*100:.2f}%")

try:
    test_loss, test_acc = model.evaluate(test_gen, verbose=0)
    print(f"Test Accuracy: {test_acc*100:.2f}%")
except:
    print("Test set has no labels, cannot compute accuracy.")


NameError: name 'history' is not defined