In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import json
import os

In [None]:
import zipfile
import os

zip_path = "dataset.zip"  # or adjust if filename differs
extract_dir = "dataset"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print("✅ Dataset unzipped to 'dataset/'")


✅ Dataset unzipped to 'dataset/'


In [None]:
import os
import shutil
import pandas as pd

# Correct path to the CSV
csv_path = "dataset/dataset/Training_set.csv"
train_dir = "dataset/dataset/train"

# Load the CSV
df = pd.read_csv(csv_path)

# Normalize label formatting
def normalize(label):
    return label.strip().upper().replace(" ", "_")

# Organize files
for _, row in df.iterrows():
    filename = row['filename']
    label = normalize(row['label'])

    src = os.path.join(train_dir, filename)
    if not os.path.isfile(src):
        print(f"⚠️ File not found: {filename}")
        continue

    label_dir = os.path.join(train_dir, label)
    os.makedirs(label_dir, exist_ok=True)
    dst = os.path.join(label_dir, filename)
    shutil.move(src, dst)
    print(f"✅ Moved {filename} → {label}/")

print("🎉 All images have been organized by class.")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
✅ Moved Image_1501.jpg → COPPER_TAIL/
✅ Moved Image_1502.jpg → DANAID_EGGFLY/
✅ Moved Image_1503.jpg → PAPER_KITE/
✅ Moved Image_1504.jpg → EASTERN_DAPPLE_WHITE/
✅ Moved Image_1505.jpg → PIPEVINE_SWALLOW/
✅ Moved Image_1506.jpg → PURPLE_HAIRSTREAK/
✅ Moved Image_1507.jpg → PIPEVINE_SWALLOW/
✅ Moved Image_1508.jpg → SCARCE_SWALLOW/
✅ Moved Image_1509.jpg → DANAID_EGGFLY/
✅ Moved Image_1510.jpg → GREY_HAIRSTREAK/
✅ Moved Image_1511.jpg → AMERICAN_SNOOT/
✅ Moved Image_1512.jpg → PIPEVINE_SWALLOW/
✅ Moved Image_1513.jpg → QUESTION_MARK/
✅ Moved Image_1514.jpg → AMERICAN_SNOOT/
✅ Moved Image_1515.jpg → BANDED_PEACOCK/
✅ Moved Image_1516.jpg → ELBOWED_PIERROT/
✅ Moved Image_1517.jpg → INDRA_SWALLOW/
✅ Moved Image_1518.jpg → BANDED_ORANGE_HELICONIAN/
✅ Moved Image_1519.jpg → ORCHARD_SWALLOW/
✅ Moved Image_1520.jpg → BECKERS_WHITE/
✅ Moved Image_1521.jpg → BECKERS_WHITE/
✅ Moved Image_1522.jpg → BANDED_PEACOCK/
✅ Moved Image_1523

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import os
import json

# === Setup Image Generators ===
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_gen = datagen.flow_from_directory(
    'dataset/dataset/train',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

val_gen = datagen.flow_from_directory(
    'dataset/dataset/train',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

# === Load base model ===
base_model = VGG16(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable = False

x = Flatten()(base_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(train_gen.num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=x)
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# === Train ===
model.fit(train_gen, validation_data=val_gen, epochs=10)

# === Save model and class names ===
model.save('vgg16_model.h5')
with open('class_indices.json', 'w') as f:
    json.dump(train_gen.class_indices, f)

print("✅ Model and class_indices.json saved.")


Found 5225 images belonging to 75 classes.
Found 1274 images belonging to 75 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/10


  self._warn_if_super_not_called()


[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 274ms/step - accuracy: 0.0298 - loss: 4.3215 - val_accuracy: 0.1507 - val_loss: 3.9691
Epoch 2/10
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 191ms/step - accuracy: 0.1018 - loss: 3.8813 - val_accuracy: 0.2959 - val_loss: 3.4161
Epoch 3/10
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 197ms/step - accuracy: 0.2052 - loss: 3.3958 - val_accuracy: 0.4160 - val_loss: 3.0370
Epoch 4/10
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 201ms/step - accuracy: 0.3006 - loss: 2.9523 - val_accuracy: 0.5047 - val_loss: 2.6578
Epoch 5/10
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 199ms/step - accuracy: 0.3667 - loss: 2.6645 - val_accuracy: 0.5973 - val_loss: 2.3167
Epoch 6/10
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 224ms/step - accuracy: 0.4128 - loss: 2.4165 - val_accuracy: 0.6546 - val_loss: 2.1061
Epoch 7/10
[1m164/16



✅ Model and class_indices.json saved.


In [None]:
import shutil

# Create ZIP of the organized dataset
shutil.make_archive("organized_dataset", 'zip', "dataset")

# Download it
from google.colab import files
files.download("organized_dataset.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import files
files.download("vgg16_model.h5")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
files.download("class_indices.json")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>