# FreshFruitClassifier: Exploratory Data Analysis

This notebook explores the food freshness dataset and visualizes sample images.

In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import seaborn as sns
from pathlib import Path

In [None]:
# Set paths
data_dir = Path('../data/processed')
train_dir = data_dir / 'train'
val_dir = data_dir / 'val'
test_dir = data_dir / 'test'

In [None]:
# Count images in each split
def count_images(directory):
    counts = {}
    for class_name in ['fresh', 'spoiled']:
        class_dir = directory / class_name
        if class_dir.exists():
            counts[class_name] = len(list(class_dir.glob('*.jpg'))) + \
                                len(list(class_dir.glob('*.png')))
    return counts

train_counts = count_images(train_dir)
val_counts = count_images(val_dir)
test_counts = count_images(test_dir)

print("Dataset Statistics:")
print(f"Train: {train_counts}")
print(f"Val: {val_counts}")
print(f"Test: {test_counts}")

In [None]:
# Visualize dataset distribution
splits = ['Train', 'Val', 'Test']
fresh_counts = [train_counts['fresh'], val_counts['fresh'], test_counts['fresh']]
spoiled_counts = [train_counts['spoiled'], val_counts['spoiled'], test_counts['spoiled']]

x = np.arange(len(splits))
width = 0.35

fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(x - width/2, fresh_counts, width, label='Fresh', color='green', alpha=0.7)
ax.bar(x + width/2, spoiled_counts, width, label='Spoiled', color='red', alpha=0.7)

ax.set_xlabel('Split')
ax.set_ylabel('Number of Images')
ax.set_title('Dataset Distribution')
ax.set_xticks(x)
ax.set_xticklabels(splits)
ax.legend()
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

In [None]:
# Display sample images
def show_samples(directory, class_name, n_samples=5):
    class_dir = directory / class_name
    images = list(class_dir.glob('*.jpg'))[:n_samples]
    
    fig, axes = plt.subplots(1, n_samples, figsize=(15, 3))
    fig.suptitle(f'{class_name.capitalize()} Samples', fontsize=16)
    
    for idx, img_path in enumerate(images):
        img = Image.open(img_path)
        axes[idx].imshow(img)
        axes[idx].axis('off')
    
    plt.tight_layout()
    plt.show()

# Show fresh samples
show_samples(train_dir, 'fresh')

# Show spoiled samples
show_samples(train_dir, 'spoiled')