In [15]:
import tensorflow as tf
from tensorflow.keras import layers, models, applications, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np
import shutil
import os
import h5py
import random

dataset_dir = "dataset"
raw_img_dir = os.path.join(dataset_dir, "raw-img")

if os.path.exists(raw_img_dir) and len(os.listdir(raw_img_dir)) > 0:
    print(f"Dataset already exists at {raw_img_dir}. Skipping download.")
else:
    print("Dataset not found. Attempting to download...")
    try:
        import kagglehub
        dataset_path = kagglehub.dataset_download("alessiocorrado99/animals10", path=dataset_dir)
        print(f"Dataset downloaded to: {dataset_path}")
    except ImportError:
        print("Error")

Dataset already exists at dataset\raw-img. Skipping download.


# Data Preprocessing and Cleaning

In [16]:
def generate_annotations():
    data = []
    for class_name in os.listdir(f"{dataset_dir}/raw-img"):
        for filename in os.listdir(f"{dataset_dir}/raw-img/{class_name}"):
            data.append({"filename": f"{dataset_dir}/raw-img/{class_name}/{filename}", "class": class_name})

    df = pd.DataFrame(data)
    df.to_csv(f"{dataset_dir}/_annotations.csv", index=False)

generate_annotations()

def create_dataset_split(dataset_dir, use_full_paths=True):
    df = pd.read_csv(f"{dataset_dir}/_annotations.csv")
    
    for split in ['train', 'test', 'valid']:
        os.makedirs(os.path.join(dataset_dir, split), exist_ok=True)
    
    train_df, test_valid_df = train_test_split(df, test_size=0.3, stratify=df['class'], random_state=42)
    valid_df, test_df = train_test_split(test_valid_df, test_size=0.5, stratify=test_valid_df['class'], random_state=42)
    
    def process_split(split_df, split_name):
        split_dir = os.path.join(dataset_dir, split_name)
        new_annotations = []
        
        for _, row in split_df.iterrows():
            src = row['filename']
            dst = os.path.join(split_dir, os.path.basename(src))
            shutil.copy(src, dst)
            
            filename = dst if use_full_paths else os.path.basename(src)
            new_annotations.append({'filename': filename, 'class': row['class']})
        
        new_df = pd.DataFrame(new_annotations)
        new_df.to_csv(os.path.join(dataset_dir, f'{split_name}_annotations.csv'), index=False)
        return new_df
    
    train_df = process_split(train_df, 'train')
    valid_df = process_split(valid_df, 'valid')
    test_df = process_split(test_df, 'test')
    
    return train_df, valid_df, test_df

train_df, valid_df, test_df = create_dataset_split(dataset_dir, use_full_paths=True)

batch_size = 64

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

valid_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=pd.read_csv(os.path.join(dataset_dir, 'train_annotations.csv')),
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
)

validation_generator = valid_datagen.flow_from_dataframe(
    dataframe=pd.read_csv(os.path.join(dataset_dir, 'valid_annotations.csv')),
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

print(f"Found {len(train_generator.filenames)} images belonging to {len(train_generator.class_indices)} classes in the training set.")
print(f"Found {len(validation_generator.filenames)} images belonging to {len(validation_generator.class_indices)} classes in the validation set.")

Found 18325 validated image filenames belonging to 10 classes.
Found 3927 validated image filenames belonging to 10 classes.
Found 18325 images belonging to 10 classes in the training set.
Found 3927 images belonging to 10 classes in the validation set.


# Model Architecture

In [17]:
input_shape = (224, 224, 3)
num_classes = len(train_generator.class_indices)

base_model = applications.ResNet50V2(
    weights='imagenet',
    include_top=False,
    input_shape=input_shape
)

base_model.trainable = False

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.BatchNormalization(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=3,
    min_lr=1e-6
)

# Model Training

In [18]:
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size,
    epochs=20,
    callbacks=[early_stop, reduce_lr]
)

base_model = model.layers[0]
base_model.trainable = True
for layer in base_model.layers[:-30]:
    layer.trainable = False

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history2 = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size,
    epochs=30,
    callbacks=[early_stop, reduce_lr]
)

Epoch 1/20
  2/286 [..............................] - ETA: 4:34 - loss: 3.2551 - accuracy: 0.0859 

KeyboardInterrupt: 

# Model Evaluation

In [None]:
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

test_loss, test_acc = model.evaluate(test_generator, steps=test_generator.samples // batch_size)
print(f'Test accuracy: {test_acc:.4f}')

model.save('animal_classification_model.tf')

# Inference on 25 Test Images into the Inference Examples Directory

In [19]:
base_model = applications.ResNet50V2(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)
base_model.trainable = False

model = tf.keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(name='global_average_pooling2d'),
    layers.BatchNormalization(name='batch_normalization'),
    layers.Dense(512, activation='relu', name='dense_4'),
    layers.Dropout(0.5),
    layers.Dense(256, activation='relu', name='dense_5'),
    layers.Dropout(0.3),
    layers.Dense(10, activation='softmax', name='dense_6')
])

dummy_input = tf.random.normal((1, 224, 224, 3))
temp = model(dummy_input)

with h5py.File('checkpoints/model_checkpoint.h5', 'r') as f:
    for layer in model.layers:
        if isinstance(layer, tf.keras.Model):
            continue
        if hasattr(layer, 'weights') and len(layer.weights) > 0:
            print(f"\nLoading weights for layer: {layer.name}")
            for weight in layer.weights:
                weight_name = weight.name.replace(layer.name + '/', '')
                weight_path = f"{layer.name}/{layer.name}/{weight_name}"
                if weight_path in f:
                    print(f"Loading {weight_path}")
                    weight_value = f[weight_path][:]
                    print(f"Weight shape in file: {weight_value.shape}")
                    print(f"Layer weight shape: {weight.shape}")
                    if weight_value.shape == weight.shape:
                        weight.assign(weight_value)
                    else:
                        print(f"Shape mismatch for {weight_path}")

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

def preprocess_image(image_path):
    img = load_img(image_path, target_size=(224, 224))
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = img_array / 255.0
    return img_array, img

def predict_and_save(image_path, class_names, save_path):
    img_array, original_img = preprocess_image(image_path)
    predictions = model.predict(img_array)
    predicted_class = class_names[np.argmax(predictions[0])]
    confidence = np.max(predictions[0]) * 100
    
    plt.figure(figsize=(8, 6))
    plt.imshow(original_img)
    plt.axis('off')
    plt.title(f'Predicted: {predicted_class}\nConfidence: {confidence:.2f}%')
    plt.savefig(save_path)
    plt.close()
    
    top_3_idx = np.argsort(predictions[0])[-3:][::-1]
    print(f"\nResults for {os.path.basename(save_path)}:")
    for idx in top_3_idx:
        print(f"{class_names[idx]}: {predictions[0][idx]*100:.2f}%")

class_names = ['Butterfly', 'Cat', 'Chicken', 'Cow', 'Dog', 
               'Elephant', 'Horse', 'Spider', 'Goat', 'Squirrel']

os.makedirs('inference_examples', exist_ok=True)

test_folder = "dataset/test"
test_images = [f for f in os.listdir(test_folder) if f.endswith('.jpeg')]

for i in range(23):
    random_image = random.choice(test_images)
    test_image_path = os.path.join(test_folder, random_image)
    save_path = f'inference_examples/classification_{i+3}.png'
    predict_and_save(test_image_path, class_names, save_path)


Loading weights for layer: batch_normalization
Loading batch_normalization/batch_normalization/gamma:0
Weight shape in file: (2048,)
Layer weight shape: (2048,)
Loading batch_normalization/batch_normalization/beta:0
Weight shape in file: (2048,)
Layer weight shape: (2048,)
Loading batch_normalization/batch_normalization/moving_mean:0
Weight shape in file: (2048,)
Layer weight shape: (2048,)
Loading batch_normalization/batch_normalization/moving_variance:0
Weight shape in file: (2048,)
Layer weight shape: (2048,)

Loading weights for layer: dense_4
Loading dense_4/dense_4/kernel:0
Weight shape in file: (2048, 512)
Layer weight shape: (2048, 512)
Loading dense_4/dense_4/bias:0
Weight shape in file: (512,)
Layer weight shape: (512,)

Loading weights for layer: dense_5
Loading dense_5/dense_5/kernel:0
Weight shape in file: (512, 256)
Layer weight shape: (512, 256)
Loading dense_5/dense_5/bias:0
Weight shape in file: (256,)
Layer weight shape: (256,)

Loading weights for layer: dense_6
Loa

NameError: name 'np' is not defined