In [3]:
pip install opendatasets

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


# Import Library

In [4]:
# ======================
# 1. Setup and Imports
# ======================
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd
import opendatasets as od

In [5]:
# Set random seeds for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

# Data Preparation

In [None]:
# ======================
# 2. Data Preparation
# ======================
# Download dataset
od.download('https://www.kaggle.com/datasets/kelixirr/pizza-steak-image-classification-dataset/data')

# Define paths
base_path = '/content/pizza-steak-image-classification-dataset/pizza_steak'
train_dir = os.path.join(base_path, 'train')
test_dir = os.path.join(base_path, 'test')

# Verify data structure
print("\nData Structure:")
for root, dirs, files in os.walk(base_path):
    print(f"{root}: {len(dirs)} directories, {len(files)} files")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username:Your Kaggle Key:Dataset URL: https://www.kaggle.com/datasets/kelixirr/pizza-steak-image-classification-dataset


# Data Visualization

In [6]:
# ======================
# 3. Data Visualization
# ======================
def show_sample_images(class_name, num_samples=3):
    class_path = os.path.join(train_dir, class_name)
    plt.figure(figsize=(15, 5))
    for i in range(num_samples):
        img_path = os.path.join(class_path, np.random.choice(os.listdir(class_path)))
        img = plt.imread(img_path)

        plt.subplot(1, num_samples, i+1)
        plt.imshow(img)
        plt.title(f"{class_name} Sample {i+1}")
        plt.axis('off')
    plt.show()

show_sample_images('pizza')
show_sample_images('steak')

NameError: name 'train_dir' is not defined

# Data Preprocessing

In [2]:
# ======================
# 4. Data Preprocessing
# ======================
# Create validation set from training data
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2  # 20% for validation
)

test_datagen = ImageDataGenerator(rescale=1./255)

NameError: name 'ImageDataGenerator' is not defined

In [None]:
# Data generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='training',
    shuffle=True,
    seed=42
)

val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='validation',
    shuffle=False
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

Found 1200 images belonging to 2 classes.
Found 300 images belonging to 2 classes.
Found 500 images belonging to 2 classes.


In [None]:
# ======================
# 5. Model Definitions
# ======================
def create_model_1():
    """Basic CNN model"""
    model = Sequential([
        Conv2D(16, (3, 3), activation='relu', input_shape=(224, 224, 3)),
        MaxPool2D((2, 2)),
        Conv2D(16, (3, 3), activation='relu'),
        MaxPool2D((2, 2)),
        Flatten(),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(),
                 loss='binary_crossentropy',
                 metrics=['accuracy'])
    return model

def create_model_2():
    """Deeper CNN with augmentation"""
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
        MaxPool2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPool2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPool2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.0001),
                 loss='binary_crossentropy',
                 metrics=['accuracy'])
    return model

def create_model_3():
    """Advanced CNN with regularization"""
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
        BatchNormalization(),
        MaxPool2D((2, 2)),
        Dropout(0.2),

        Conv2D(64, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPool2D((2, 2)),
        Dropout(0.3),

        Conv2D(128, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPool2D((2, 2)),
        Dropout(0.4),

        Flatten(),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=0.0001),
                 loss='binary_crossentropy',
                 metrics=['accuracy'])
    return model


In [None]:
# ======================
# 6. Model Training
# ======================
def train_model(model, model_name):
    print(f"\nTraining {model_name}...")
    history = model.fit(
        train_generator,
        epochs=15,
        validation_data=val_generator,
        callbacks=[EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)]
    )
    return history

# Create and train all models
model_1 = create_model_1()
model_1.summary()
history_1 = train_model(model_1, "Model 1")

model_2 = create_model_2()
model_2.summary()
history_2 = train_model(model_2, "Model 2")

model_3 = create_model_3()
model_3.summary()
history_3 = train_model(model_3, "Model 3")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  self._warn_if_super_not_called()



Training Model 1...
Epoch 1/15
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 2s/step - accuracy: 0.5142 - loss: 0.8325 - val_accuracy: 0.6500 - val_loss: 0.6061
Epoch 2/15
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 2s/step - accuracy: 0.7081 - loss: 0.5617 - val_accuracy: 0.7033 - val_loss: 0.6145
Epoch 3/15
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 2s/step - accuracy: 0.7363 - loss: 0.5198 - val_accuracy: 0.8033 - val_loss: 0.4870
Epoch 4/15
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 2s/step - accuracy: 0.7693 - loss: 0.4719 - val_accuracy: 0.7133 - val_loss: 0.5184
Epoch 5/15
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 2s/step - accuracy: 0.7802 - loss: 0.4717 - val_accuracy: 0.7833 - val_loss: 0.5084
Epoch 6/15
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.7852 - loss: 0.4768

In [None]:
# ======================
# 7. Evaluation
# ======================
def plot_history(history, title):
    plt.figure(figsize=(12, 4))

    # Accuracy plot
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title(f'{title} - Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend()

    # Loss plot
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'{title} - Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()

    plt.tight_layout()
    plt.show()

plot_history(history_1, "Model 1")
plot_history(history_2, "Model 2")
plot_history(history_3, "Model 3")

# Evaluate all models
def evaluate_model(model, model_name):
    print(f"\nEvaluating {model_name}...")
    test_loss, test_acc = model.evaluate(test_generator)
    y_pred = np.round(model.predict(test_generator)).astype(int)

    print("\nClassification Report:")
    print(classification_report(test_generator.labels, y_pred))

    print("Confusion Matrix:")
    print(confusion_matrix(test_generator.labels, y_pred))

    return test_acc

acc1 = evaluate_model(model_1, "Model 1")
acc2 = evaluate_model(model_2, "Model 2")
acc3 = evaluate_model(model_3, "Model 3")


# Model Comparison

In [None]:
# ======================
# 8. Model Comparison
# ======================
results = pd.DataFrame({
    'Model': ['Basic CNN', 'Deeper CNN', 'Advanced CNN'],
    'Test Accuracy': [acc1, acc2, acc3],
    'Parameters': [
        f"{model_1.count_params():,}",
        f"{model_2.count_params():,}",
        f"{model_3.count_params():,}"
    ]
})

print("\nModel Comparison:")
display(results.sort_values('Test Accuracy', ascending=False))

# Visual comparison
plt.figure(figsize=(10, 5))
plt.bar(results['Model'], results['Test Accuracy'], color=['blue', 'green', 'red'])
plt.title('Model Comparison by Test Accuracy')
plt.ylabel('Accuracy')
plt.ylim(0.7, 0.95)
for i, v in enumerate(results['Test Accuracy']):
    plt.text(i, v+0.01, f"{v:.3f}", ha='center')
plt.show()


# Save Models


In [None]:
# ======================
# 9. Save Models
# ======================
model_1.save('pizza_steak_model_1.h5')
model_2.save('pizza_steak_model_2.h5')
model_3.save('pizza_steak_model_3.h5')
print("\nAll models saved successfully!")