In [8]:
import os
import json
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import numpy as np

# Load data

In [9]:
import random

categories = {
    1: 'short sleeve top',
    2: 'long sleeve top',
    3: 'short sleeve outwear',
    4: 'long sleeve outwear',
    5: 'vest',
    6: 'sling',
    7: 'shorts',
    8: 'trousers',
    9: 'skirt',
    10: 'short sleeve dress',
    11: 'long sleeve dress',
    12: 'vest dress',
    13: 'sling dress'
}

color_dict = {}
for el in categories.values():
    color_dict[el] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))

In [10]:
def load_json(dataset, img_number):
    path = f'../DeepFashion2/{dataset}/annos/{img_number}.json'
    with open(path, 'r') as f:
        data = json.load(f)

    return data

In [11]:
def load_image(dataset, img_number):
    path = f'../DeepFashion2/{dataset}/image/{img_number}'
    img = cv2.imread(path, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    return img

In [12]:
def draw_bbox(img, data):
    img = img.copy()
    for item in data.keys():
        if item == 'source' or item == 'pair_id':
            continue
        category_name = data[item]["category_name"]

        x1, y1, w, h = data[item]["bounding_box"]
        img = cv2.rectangle(img, (x1, y1), (x1 + w, y1 + h), color_dict[category_name], 8)
        img = cv2.putText(img, category_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color_dict[category_name],
                          2)

    return img

In [25]:
import matplotlib.patches as patches

def draw_segmentation(img, data):
    img = img.copy()
    for item in data.keys():
        if item == 'source' or item == 'pair_id':
            continue
        category_name = data[item]["category_name"]
        
        segmentation = data[item]["segmentation"]
        
        for seg in segmentation:
            seg = np.array(seg).reshape(-1, 2).astype(np.int32)
            img = cv2.polylines(img, [seg], isClosed=True, color=color_dict[category_name], thickness=8)
            
    return img

# Data analysis

DeepFashion2 dataset contains three folders:

- train (191161 images)
- validation
- test

For EDA I will use only train sample.

In [26]:
# show 10 images and their annotations
dataset = 'train'
img_numbers = os.listdir(f'../DeepFashion2/{dataset}/image')[:10]

fig, axs = plt.subplots(2, 5, figsize=(20, 10))

for i, img_number in enumerate(img_numbers):
    data = load_json(dataset, img_number[:-4])
    img = load_image(dataset, img_number)
    img = draw_bbox(img, data)
    img = draw_segmentation(img, data)
    
    axs[i // 5, i % 5].imshow(img)
    axs[i // 5, i % 5].axis('off')
    axs[i // 5, i % 5].set_title(f'Image {img_number}')

plt.show()

In [68]:
# create df that contains image height, width, number of objects and their categories

data_df = []
cat_count = {}
for img in os.listdir(f'../DeepFashion2/{dataset}/image'):
    data = load_json(dataset, img[:-4])
    image = load_image(dataset, img)

    height, width, channels = image.shape
    num_objects = 0
    for item in data.keys():
        if item == 'source' or item == 'pair_id':
            continue
        num_objects += 1

        category_name = data[item]["category_name"]
        if category_name in cat_count:
            cat_count[category_name] += 1
        else:
            cat_count[category_name] = 1

    data_df.append({
        'img': img,
        'height': height,
        'width': width,
        'num_objects': num_objects
    })

df = pd.DataFrame(data_df)
df


In [None]:
# plot number of objects in image

plt.figure(figsize=(10, 5))
plt.hist(df['num_objects'], bins=20)
plt.xlabel('Number of objects')
plt.ylabel('Number of images')
plt.title('Number of objects in image distribution')
plt.show()

In [None]:
# plot categories distribution
plt.figure(figsize=(10, 5))
plt.bar(cat_count.keys(), cat_count.values())
