In [None]:
# ✅ Install dependencies if needed
# !pip install tensorflow pandas matplotlib --quiet

# -------------------------------
# STEP 1: IMPORT LIBRARIES
# -------------------------------
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import random

# -------------------------------
# STEP 2: DEFINE DATA PATHS
# -------------------------------
base_dir = r"C:\Users\HP\Downloads\datasets_dog_breed_classification"
train_dir = os.path.join(base_dir, "train")
test_dir = os.path.join(base_dir, "test")
labels_path = os.path.join(base_dir, "labels.csv")

# -------------------------------
# STEP 3: LOAD AND FIX LABELS
# -------------------------------
df = pd.read_csv(labels_path)
print("Before fixing:")
print(df.head())

# Add '.jpg' to filenames if missing
df['id'] = df['id'].apply(lambda x: x + ".jpg")

print("\nAfter fixing filenames:")
print(df.head())

# -------------------------------
# STEP 4: VERIFY FILE EXISTENCE
# -------------------------------
sample_path = os.path.join(train_dir, df['id'].iloc[0])
print("\nSample path check:", sample_path)
print("Exists?", os.path.exists(sample_path))

# -------------------------------
# STEP 5: IMAGE DATA GENERATORS
# -------------------------------
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=df,
    directory=train_dir,
    x_col='id',
    y_col='breed',
    target_size=(224, 224),
    batch_size=32,
    subset='training',
    class_mode='categorical'
)

val_generator = train_datagen.flow_from_dataframe(
    dataframe=df,
    directory=train_dir,
    x_col='id',
    y_col='breed',
    target_size=(224, 224),
    batch_size=32,
    subset='validation',
    class_mode='categorical'
)

print(f"\n✅ Found {train_generator.samples} training and {val_generator.samples} validation images.")

# -------------------------------
# STEP 6: LOAD PRETRAINED MODEL (VGG16)
# -------------------------------
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224,224,3))

# Freeze convolutional base
for layer in base_model.layers:
    layer.trainable = False

# -------------------------------
# STEP 7: BUILD THE MODEL
# -------------------------------
model = Sequential([
    base_model,
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(train_generator.class_indices), activation='softmax')  # number of breeds
])

# -------------------------------
# STEP 8: COMPILE MODEL
# -------------------------------
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# -------------------------------
# STEP 9: TRAIN THE MODEL
# -------------------------------
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)

# -------------------------------
# STEP 10: EVALUATE MODEL
# -------------------------------
val_loss, val_acc = model.evaluate(val_generator)
print(f"\n✅ Validation Accuracy: {val_acc*100:.2f}%")

# -------------------------------
# STEP 11: PLOT TRAINING HISTORY
# -------------------------------
plt.figure(figsize=(12,5))

plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

# -------------------------------
# STEP 12: RANDOM PREDICTIONS
# -------------------------------
labels = list(train_generator.class_indices.keys())
test_images = os.listdir(test_dir)

for i in range(3):
    random_img = random.choice(test_images)
    img_path = os.path.join(test_dir, random_img)
    img = tf.keras.utils.load_img(img_path, target_size=(224,224))
    plt.imshow(img)
    plt.axis('off')

    img_array = tf.keras.utils.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    prediction = model.predict(img_array)
    plt.title(f"Predicted: {labels[np.argmax(prediction)]}")
    plt.show()


                                 id             breed
0  000bec180eb18c7604dcecc8fe0dba07       boston_bull
1  001513dfcb2ffafc82cccf4d8bbaba97             dingo
2  001cdf01b096e06d78e9e5112d419397          pekinese
3  00214f311d5d2247d5dfe4fe24b2303d          bluetick
4  0021f9ceb3235effd7fcde7f7538ed62  golden_retriever
Found 0 validated image filenames belonging to 0 classes.
Found 0 validated image filenames belonging to 0 classes.




ValueError: The PyDataset has length 0