In [None]:
!pip install scipy numpy matplotlib opencv-python

In [None]:
import scipy.io
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
from collections import Counter

img_dir = "data/images/"
labels_path = "data/imagelabels.mat"

labels = scipy.io.loadmat(labels_path)['labels'][0]
print(f"Total images: {len(labels)}")
print(f"Unique classes: {len(np.unique(labels))}")

label_counts = Counter(labels)
print("\nTop 5 most frequent labels:", label_counts.most_common(5))

img_paths = sorted(glob(img_dir + "*.jpg"))
sample_idx = 5
img = cv2.imread(img_paths[sample_idx])
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

plt.imshow(img_rgb)
plt.title(f"Label: {labels[sample_idx]}")
plt.axis('off')
plt.show()

shapes = [cv2.imread(p).shape for p in img_paths[:100]]
heights = [s[0] for s in shapes]
widths = [s[1] for s in shapes]

print(f"\nAvg Height: {np.mean(heights):.2f} px")
print(f"Avg Width: {np.mean(widths):.2f} px")

plt.hist(heights, bins=10, alpha=0.7, label='Heights')
plt.hist(widths, bins=10, alpha=0.7, label='Widths')
plt.legend()
plt.title("Distribution of Image Sizes (First 100)")
plt.show()
