# ALL IMPORTS

In [32]:
from PIL import Image
import os
from collections import Counter
import pandas as pd

# PATHS TO DATASET

In [33]:
PATH = "humanSegmentation"
PATH_IMAGES = f"{PATH}/images"
PATH_MASKS = f"{PATH}/masks"
PATH_CSV = f"{PATH}/df.csv"

# CHECK EVERY IMAGE SIZE AND PRINT DIFFERENT SIZES AND ITS COUNT

In [34]:
images = os.listdir(PATH_IMAGES)
masks = os.listdir(PATH_MASKS)
sizes = [Image.open(os.path.join(PATH_IMAGES, img)).size for img in images]
size_dict = Counter(sizes)

In [35]:
print(f'{list(size_dict.keys())[:3]} ... {list(size_dict.keys())[-3:]}')
print(f'Count of different image sizes: {len(size_dict)}')

[(532, 800), (800, 800), (533, 800)] ... [(1105, 800), (1033, 800), (1397, 800)]
Count of different image sizes: 489


# CHECKING IF EVERY IMAGE HAS ITS OWN MASK

In [36]:
dataframe = pd.read_csv(PATH_CSV)
dataframe.head()

Unnamed: 0.1,Unnamed: 0,images,masks,collages
0,0,images/ds10_pexels-photo-687782.png,masks/ds10_pexels-photo-687782.png,collage/ds10_pexels-photo-687782.jpg
1,1,images/ds10_pexels-photo-835971.png,masks/ds10_pexels-photo-835971.png,collage/ds10_pexels-photo-835971.jpg
2,2,images/ds10_pexels-photo-850708.png,masks/ds10_pexels-photo-850708.png,collage/ds10_pexels-photo-850708.jpg
3,3,images/ds10_pexels-photo-864937.png,masks/ds10_pexels-photo-864937.png,collage/ds10_pexels-photo-864937.jpg
4,4,images/ds10_pexels-photo-865908.png,masks/ds10_pexels-photo-865908.png,collage/ds10_pexels-photo-865908.jpg


In [37]:
# make tuple from image and mask
image_mask_pairs = [(os.path.split(row['images'])[1], os.path.split(row['masks'])[1]) for index, row in dataframe.iterrows()]
print(f'First 3 image-mask pairs: {image_mask_pairs[:3]}')

First 3 image-mask pairs: [('ds10_pexels-photo-687782.png', 'ds10_pexels-photo-687782.png'), ('ds10_pexels-photo-835971.png', 'ds10_pexels-photo-835971.png'), ('ds10_pexels-photo-850708.png', 'ds10_pexels-photo-850708.png')]


In [39]:
mismatches = sum(1 for img, mask in image_mask_pairs if img != mask)
print(f'Total mismatches found: {mismatches}')
print('All image-mask pairs match correctly.' if mismatches == 0 else f'{mismatches} mismatches found.')

Total mismatches found: 0
All image-mask pairs match correctly.


# CHECK IF EVERY PAIR HAVE THIS SAME SIZE

In [41]:
diff_size_count = sum(1 for img_name, mask_name in image_mask_pairs if Image.open(os.path.join(PATH_IMAGES, img_name)).size != Image.open(os.path.join(PATH_MASKS, mask_name)).size)
print(f'Total image-mask pairs with different sizes: {diff_size_count}')
print('All image-mask pairs have the same size.' if diff_size_count == 0 else f'{diff_size_count} pairs have different sizes.')

Total image-mask pairs with different sizes: 0
All image-mask pairs have the same size.
