# Raw vs Cooked Food Recognition - Interactive Notebook

This notebook provides an interactive environment for training and testing the food classification model.

## Table of Contents
1. [Setup and Imports](#setup)
2. [Data Exploration](#exploration)
3. [Model Training](#training)
4. [Model Evaluation](#evaluation)
5. [Predictions](#predictions)
6. [Visualization](#visualization)

## 1. Setup and Imports <a name="setup"></a>

In [None]:
# Import required libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import os
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Set random seeds
np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

## 2. Data Exploration <a name="exploration"></a>

In [None]:
# Check dataset structure
train_dir = 'data/train'
val_dir = 'data/validation'

def count_images(directory):
    """Count images in directory"""
    if not os.path.exists(directory):
        return 0
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif']
    return len([f for f in os.listdir(directory) 
               if os.path.splitext(f.lower())[1] in image_extensions])

# Count images
train_raw = count_images(os.path.join(train_dir, 'raw'))
train_cooked = count_images(os.path.join(train_dir, 'cooked'))
val_raw = count_images(os.path.join(val_dir, 'raw'))
val_cooked = count_images(os.path.join(val_dir, 'cooked'))

print("Dataset Statistics:")
print("="*50)
print(f"Training Set:")
print(f"  Raw: {train_raw}")
print(f"  Cooked: {train_cooked}")
print(f"  Total: {train_raw + train_cooked}")
print(f"\nValidation Set:")
print(f"  Raw: {val_raw}")
print(f"  Cooked: {val_cooked}")
print(f"  Total: {val_raw + val_cooked}")

# Visualize distribution
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Training set
ax1.bar(['Raw', 'Cooked'], [train_raw, train_cooked], color=['#95E1D3', '#FF6B6B'])
ax1.set_title('Training Set Distribution', fontsize=14, fontweight='bold')
ax1.set_ylabel('Number of Images')
for i, v in enumerate([train_raw, train_cooked]):
    ax1.text(i, v + 5, str(v), ha='center', va='bottom', fontweight='bold')

# Validation set
ax2.bar(['Raw', 'Cooked'], [val_raw, val_cooked], color=['#95E1D3', '#FF6B6B'])
ax2.set_title('Validation Set Distribution', fontsize=14, fontweight='bold')
ax2.set_ylabel('Number of Images')
for i, v in enumerate([val_raw, val_cooked]):
    ax2.text(i, v + 1, str(v), ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
# Visualize sample images
def show_sample_images(directory, n_samples=5):
    """Display sample images from each category"""
    fig, axes = plt.subplots(2, n_samples, figsize=(15, 6))
    
    for row, category in enumerate(['raw', 'cooked']):
        cat_dir = os.path.join(directory, category)
        if os.path.exists(cat_dir):
            images = [f for f in os.listdir(cat_dir) 
                     if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
            
            for col in range(min(n_samples, len(images))):
                img_path = os.path.join(cat_dir, images[col])
                img = Image.open(img_path)
                axes[row, col].imshow(img)
                axes[row, col].axis('off')
                if col == 0:
                    axes[row, col].set_title(category.capitalize(), 
                                           fontsize=12, fontweight='bold', loc='left')
    
    plt.suptitle('Sample Images from Dataset', fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.show()

if train_raw > 0 and train_cooked > 0:
    show_sample_images(train_dir, n_samples=5)
else:
    print("No images found in training directory!")

## 3. Model Training <a name="training"></a>

In [None]:
# Set hyperparameters
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32
EPOCHS = 50

In [None]:
# Create data generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=True
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

print(f"Class indices: {train_generator.class_indices}")

In [None]:
# Visualize augmented images
def visualize_augmentation(generator):
    """Show examples of data augmentation"""
    batch = next(generator)
    images = batch[0][:9]
    
    fig, axes = plt.subplots(3, 3, figsize=(12, 12))
    axes = axes.ravel()
    
    for i, img in enumerate(images):
        axes[i].imshow(img)
        axes[i].axis('off')
    
    plt.suptitle('Data Augmentation Examples', fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.show()

visualize_augmentation(train_generator)

In [None]:
# Build model
model = keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),
    
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),
    
    layers.Conv2D(256, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),
    
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.Precision(), keras.metrics.Recall()]
)

model.summary()

In [None]:
# Setup callbacks
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=1e-7
)

checkpoint = keras.callbacks.ModelCheckpoint(
    'notebook_best_model.h5',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max'
)

In [None]:
# Train the model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS,
    callbacks=[early_stopping, reduce_lr, checkpoint],
    verbose=1
)

## 4. Model Evaluation <a name="evaluation"></a>

In [None]:
# Plot training history
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Accuracy
axes[0, 0].plot(history.history['accuracy'], label='Train', linewidth=2)
axes[0, 0].plot(history.history['val_accuracy'], label='Validation', linewidth=2)
axes[0, 0].set_title('Model Accuracy', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].legend()
axes[0, 0].grid(True)

# Loss
axes[0, 1].plot(history.history['loss'], label='Train', linewidth=2)
axes[0, 1].plot(history.history['val_loss'], label='Validation', linewidth=2)
axes[0, 1].set_title('Model Loss', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].legend()
axes[0, 1].grid(True)

# Precision
axes[1, 0].plot(history.history['precision'], label='Train', linewidth=2)
axes[1, 0].plot(history.history['val_precision'], label='Validation', linewidth=2)
axes[1, 0].set_title('Model Precision', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Precision')
axes[1, 0].legend()
axes[1, 0].grid(True)

# Recall
axes[1, 1].plot(history.history['recall'], label='Train', linewidth=2)
axes[1, 1].plot(history.history['val_recall'], label='Validation', linewidth=2)
axes[1, 1].set_title('Model Recall', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Recall')
axes[1, 1].legend()
axes[1, 1].grid(True)

plt.tight_layout()
plt.savefig('notebook_training_history.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Evaluate on validation set
val_loss, val_accuracy, val_precision, val_recall = model.evaluate(val_generator)

print("\nValidation Metrics:")
print("="*50)
print(f"Loss: {val_loss:.4f}")
print(f"Accuracy: {val_accuracy:.4f}")
print(f"Precision: {val_precision:.4f}")
print(f"Recall: {val_recall:.4f}")
print(f"F1-Score: {2 * (val_precision * val_recall) / (val_precision + val_recall):.4f}")

In [None]:
# Generate predictions for confusion matrix
val_generator.reset()
predictions = model.predict(val_generator, steps=len(val_generator))
predicted_classes = (predictions > 0.5).astype(int).flatten()
true_classes = val_generator.classes

# Confusion matrix
cm = confusion_matrix(true_classes, predicted_classes)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Cooked', 'Raw'],
            yticklabels=['Cooked', 'Raw'])
plt.title('Confusion Matrix', fontsize=16, fontweight='bold')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.savefig('confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

# Classification report
print("\nClassification Report:")
print("="*50)
print(classification_report(true_classes, predicted_classes, 
                          target_names=['Cooked', 'Raw']))

## 5. Predictions <a name="predictions"></a>

In [None]:
# Save the model
model.save('notebook_food_classifier.h5')
print("✓ Model saved as 'notebook_food_classifier.h5'")

In [None]:
# Function to predict single image
def predict_image(image_path, model):
    """Predict if food is raw or cooked"""
    img = Image.open(image_path)
    img = img.convert('RGB')
    img = img.resize((IMG_WIDTH, IMG_HEIGHT))
    img_array = np.array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    
    prediction = model.predict(img_array, verbose=0)[0][0]
    
    if prediction > 0.5:
        label = 'Raw'
        confidence = prediction * 100
    else:
        label = 'Cooked'
        confidence = (1 - prediction) * 100
    
    return {
        'label': label,
        'confidence': confidence,
        'raw_score': prediction * 100,
        'cooked_score': (1 - prediction) * 100,
        'image': img
    }

# Test prediction (replace with your image path)
# result = predict_image('path/to/your/image.jpg', model)
# print(f"Prediction: {result['label']} ({result['confidence']:.2f}% confidence)")

## 6. Visualization <a name="visualization"></a>

In [None]:
# Visualize predictions on validation set
val_generator.reset()
batch = next(val_generator)
images = batch[0][:9]
true_labels = batch[1][:9]

predictions = model.predict(images, verbose=0)

fig, axes = plt.subplots(3, 3, figsize=(15, 15))
axes = axes.ravel()

class_names = ['Cooked', 'Raw']

for i, (img, true_label, pred) in enumerate(zip(images, true_labels, predictions)):
    axes[i].imshow(img)
    axes[i].axis('off')
    
    pred_class = int(pred[0] > 0.5)
    pred_label = class_names[pred_class]
    true_label_name = class_names[int(true_label)]
    confidence = pred[0] * 100 if pred_class == 1 else (1 - pred[0]) * 100
    
    color = 'green' if pred_class == int(true_label) else 'red'
    
    axes[i].set_title(
        f"True: {true_label_name}\nPred: {pred_label} ({confidence:.1f}%)",
        color=color,
        fontweight='bold'
    )

plt.suptitle('Predictions on Validation Set', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('validation_predictions.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Summary statistics
final_metrics = {
    'Metric': ['Training Accuracy', 'Validation Accuracy', 'Precision', 'Recall', 'F1-Score'],
    'Score': [
        f"{history.history['accuracy'][-1]:.4f}",
        f"{val_accuracy:.4f}",
        f"{val_precision:.4f}",
        f"{val_recall:.4f}",
        f"{2 * (val_precision * val_recall) / (val_precision + val_recall):.4f}"
    ]
}

df_metrics = pd.DataFrame(final_metrics)
print("\nFinal Model Metrics:")
print("="*50)
print(df_metrics.to_string(index=False))

# Save metrics
df_metrics.to_csv('model_metrics.csv', index=False)
print("\n✓ Metrics saved to 'model_metrics.csv'")

## Conclusion

Your model is now trained and ready to use! You can:

1. Use the saved model (`notebook_food_classifier.h5`) for predictions
2. Load it in the web app or prediction scripts
3. Continue experimenting with different architectures
4. Fine-tune hyperparameters for better performance

**Next Steps:**
- Try transfer learning with pre-trained models (VGG16, ResNet, EfficientNet)
- Experiment with different data augmentation techniques
- Collect more diverse training data
- Test on real-world images