In [None]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from PIL import Image
import random
from pathlib import Path


: 

In [None]:
DATA_DIR = Path("../data/train")  # adjust path if needed
classes = ['yes', 'no']

# Count images in each class
counts = {cls: len(list((DATA_DIR/cls).glob("*.jpg"))) for cls in classes}
print("Image count per class:", counts)

# Plot class distribution
sns.barplot(x=list(counts.keys()), y=list(counts.values()))
plt.title("Class Distribution (Tumor vs No Tumor)")
plt.show()


In [None]:
def show_random_images(cls, num=5):
    imgs = list((DATA_DIR/cls).glob("*.jpg"))
    selected = random.sample(imgs, num)

    fig, axs = plt.subplots(1, num, figsize=(15,5))
    for ax, img_path in zip(axs, selected):
        img = Image.open(img_path)
        ax.imshow(img, cmap='gray')
        ax.set_title(cls)
        ax.axis('off')
    plt.suptitle(f"Sample images from class: {cls}")
    plt.show()

show_random_images('yes')
show_random_images('no')


In [None]:
sizes = []
for cls in classes:
    for img_path in (DATA_DIR / cls).glob("*.jpg"):
        with Image.open(img_path) as img:
            sizes.append(img.size)

# Plot most common image sizes
from collections import Counter
size_counts = Counter(sizes)
common_sizes = size_counts.most_common(5)
print("Most common image sizes:", common_sizes)

# Plot distribution
widths, heights = zip(*sizes)
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
sns.histplot(widths, bins=20, kde=False)
plt.title("Image Widths")

plt.subplot(1,2,2)
sns.histplot(heights, bins=20, kde=False)
plt.title("Image Heights")
plt.tight_layout()
plt.show()


In [None]:
img_path = list((DATA_DIR / 'yes').glob("*.jpg"))[0]
img = Image.open(img_path)
print("Mode:", img.mode)  # L = grayscale, RGB = color

plt.imshow(img, cmap='gray')
plt.title("Sample MRI")
plt.axis('off')
plt.show()


In [None]:
gray_values = []

for cls in classes:
    for img_path in (DATA_DIR / cls).glob("*.jpg"):
        img = Image.open(img_path).convert('L')  # grayscale
        gray_values.extend(np.array(img).flatten())

sns.histplot(gray_values, bins=50, kde=True)
plt.title("Pixel Intensity Distribution (Grayscale)")
plt.xlabel("Pixel value (0-255)")
plt.show()
