In [3]:
import os
import zipfile
import requests

def download_and_extract(url, extract_to):
    zip_path = os.path.join(extract_to, "dataset.zip")
    if not os.path.exists(extract_to):
        os.makedirs(extract_to)

    # Download the dataset if not already downloaded
    if not os.path.exists(zip_path):
        print("Downloading dataset...")
        response = requests.get(url, stream=True)
        with open(zip_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
        print(f"Dataset downloaded to {zip_path}")

    # Extract the dataset
    print("Extracting dataset...")
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(extract_to)
    print("Extraction completed.")
    return extract_to  # Return the extraction directory path

def main():
    # Define dataset URL and extraction path
    dataset_url = "https://storage.googleapis.com/mlep-public/course_1/week2/kagglecatsanddogs_3367a.zip"
    extract_to = "./datasets"

    # Download and extract the dataset
    data_path = download_and_extract(dataset_url, extract_to)

    # Check available directories
    print("Available directories:", os.listdir(data_path))

    # Locate the PetImages folder directly
    pet_images_dir = os.path.join(data_path, "PetImages")
    if not os.path.exists(pet_images_dir):
        raise FileNotFoundError(f"'PetImages' folder not found in {data_path}.")

    print("PetImages folder located at:", pet_images_dir)

    # Add your logic for data preparation and model training here
    # For example:
    # train_generator, validation_generator = prepare_data(pet_images_dir)
    print("Data preparation and training logic goes here.")
    print("Contents of './datasets':", os.listdir("./datasets"))
    print("Contents of './datasets/PetImages':", os.listdir("./datasets/PetImages"))


if __name__ == "__main__":
    main()


Downloading dataset...
Dataset downloaded to ./datasets/dataset.zip
Extracting dataset...
Extraction completed.
Available directories: ['readme[1].txt', 'MSR-LA - 3467.docx', 'dataset.zip', 'PetImages']
PetImages folder located at: ./datasets/PetImages
Data preparation and training logic goes here.
Contents of './datasets': ['readme[1].txt', 'MSR-LA - 3467.docx', 'dataset.zip', 'PetImages']
Contents of './datasets/PetImages': ['Cat', 'Dog']


In [14]:
import os
import tensorflow as tf
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy

# Function to clean the dataset
def clean_dataset(directory):
    """Remove non-image or corrupted files from the dataset."""
    for category in ["Dog", "Cat"]:  # Update based on class names in your dataset
        folder = os.path.join(directory, category)
        if not os.path.exists(folder):
            print(f"Folder not found: {folder}")
            continue

        for filename in os.listdir(folder):
            file_path = os.path.join(folder, filename)
            try:
                # Try opening the file as an image
                with Image.open(file_path) as img:
                    img.verify()  # Check if it's a valid image
            except (IOError, SyntaxError):
                print(f"Removing corrupted or non-image file: {file_path}")
                os.remove(file_path)


def prepare_data(data_path):
    datagen = ImageDataGenerator(rescale=1.0 / 255, validation_split=0.2)
    train_generator = datagen.flow_from_directory(
        data_path, target_size=(64, 64), batch_size=32, class_mode="binary", subset="training"
    )
    validation_generator = datagen.flow_from_directory(
        data_path, target_size=(64, 64), batch_size=32, class_mode="binary", subset="validation"
    )
    return train_generator, validation_generator
'''
def build_model():
    base_model = MobileNetV2(input_shape=(64, 64, 3), include_top=False, weights="imagenet")
    base_model.trainable = False
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(1, activation="sigmoid")
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss=BinaryCrossentropy(), metrics=["accuracy"])
    return model
    '''
def build_model():
    # Building a CNN model for binary classification (cats vs dogs)
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss=BinaryCrossentropy(), metrics=["accuracy"])
    return model

def main():
    pet_images_dir = "./datasets/PetImages"  # Adjust based on your dataset path
    clean_dataset(pet_images_dir)
    train_generator, validation_generator = prepare_data(pet_images_dir)

    model = build_model()
    history = model.fit(
        train_generator,
        steps_per_epoch=50,  # Reduced for speed
        epochs=5,
        validation_data=validation_generator,
        validation_steps=10  # Reduced for speed
    )
    print("Model training complete!")


    model.save("cat_dog_classifier.h5")
    print("Model saved as cat_dog_classifier.h5")

    # Evaluate the model

    print("Evaluating the model...")

    loss, accuracy = model.evaluate(validation_generator)
    print(f"Validation Loss: {loss:.4f}")
    print(f"Validation Accuracy: {accuracy:.4f}")


if __name__ == "__main__":
    main()



Found 20000 images belonging to 2 classes.
Found 4998 images belonging to 2 classes.
Epoch 1/5


  self._warn_if_super_not_called()


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 321ms/step - accuracy: 0.5279 - loss: 0.7089 - val_accuracy: 0.5938 - val_loss: 0.6605
Epoch 2/5
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 305ms/step - accuracy: 0.6095 - loss: 0.6608 - val_accuracy: 0.5875 - val_loss: 0.6702
Epoch 3/5
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 297ms/step - accuracy: 0.6305 - loss: 0.6399 - val_accuracy: 0.6500 - val_loss: 0.6339
Epoch 4/5
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 295ms/step - accuracy: 0.6547 - loss: 0.6215 - val_accuracy: 0.6187 - val_loss: 0.6371
Epoch 5/5
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 297ms/step - accuracy: 0.6785 - loss: 0.6152 - val_accuracy: 0.6625 - val_loss: 0.6038




Model training complete!
Model saved as cat_dog_classifier.h5
Evaluating the model...
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 115ms/step - accuracy: 0.6541 - loss: 0.6197
Validation Loss: 0.6159
Validation Accuracy: 0.6575
