In [100]:
import json
import os
import re
from PIL import Image
import cv2
import numpy as np
from matplotlib import pyplot as plt
from pathlib import Path

In [101]:
output_dir = './dataset/processed/'

# Make sure input_dir has annos and images folder already created with the json files
input_dir_img = './dataset/validation/image/'
input_dir_annos = './dataset/validation/annos/'

categories = ["trousers"]

In [109]:
# Make directories
Path(otuput_dir).mkdir(parents=True, exist_ok=True)
for c in categories:
    par = output_dir+c+'/'
    Path(par).mkdir(parents=True, exist_ok=True)
    Path(par+'filtered/').mkdir(parents=True, exist_ok=True)
    Path(par+'edges/').mkdir(parents=True, exist_ok=True)

In [103]:
# Filter if a certain file matches what we need and return it in a dict
def process_json(filename, dress_type = "trousers"):
    global pair_id_processed
    matched_files = []
    with open(filename) as json_file:
        data = json.load(json_file)
        
        if data["source"] != "shop":
            return
        
        for key in data.keys():
            if re.match("(item)\d", key):
                visible = True
                for i, val in enumerate(data[key]["landmarks"]):
                    if (i+1)%3 == 0 and val == 0:
                        visible = False
                        break
                if data["pair_id"] not in pair_id_processed and visible and data[key]["category_name"] == dress_type:
                    matched_files.append({"json": data, "item": key, "category_name":dress_type, "segmentation": data[key]["segmentation"], "bounding_box": data[key]["bounding_box"], "filename": filename[-11:][:6]})
                    pair_id_processed[data["pair_id"]] = True
    
    return matched_files if len(matched_files) > 0 else None

In [104]:
# Load the files we are interested in and process them
req_file = []
pair_id_processed = {}
for subdir, dirs, files in os.walk(input_dir_annos):
    for file in files:
        filepath = subdir + os.sep + file
        matches = process_json(filepath)
        if matches != None:
            req_file.append(matches)
    

In [105]:
def crop_image(path, filename, bounding_box, dress_type, output):
    # Bounding box = (left, top, right, bottom)
    img = Image.open(path+filename)
    img = img.crop((bounding_box[0], bounding_box[1], bounding_box[2], bounding_box[3])) 
  
    # Shows the image in image viewer 
    img.show() 
    img.save(output+dress_type+"/"+filename, "JPEG", quality=100, optimize=True, progressive=True)


In [106]:
def filter_img_segmentation(img, segmentation):
    temp_img = np.zeros(img.shape).astype(img.dtype)
    segs = []
    segmentation = np.asarray(segmentation)
    for i in range(len(segmentation)):
        segs.append([])
        for j in range(0, len(segmentation[i]), 2):
            segs[i].append(np.asarray([int(segmentation[i][j]), int(segmentation[i][j+1])]))
            
        segs[i] = np.asarray(segs[i])
            
    cv2.fillPoly(temp_img, np.asarray(segs), [255, 255, 255])
    img = cv2.bitwise_and(img, temp_img)
    
    return img

In [107]:
for matched_data_arr in req_file:
    for data in matched_data_arr:
        img = cv2.imread(input_dir_img+data["filename"]+".jpg", -1)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # Filter the images
        img_filtered = filter_img_segmentation(img, data["segmentation"])
        img_filtered = cv2.cvtColor(img_filtered, cv2.COLOR_RGB2BGR)
        
        # Edge detection
        img_edge = cv2.Canny(img_filtered,100,200)
        img_edge = cv2.cvtColor(img_edge, cv2.COLOR_RGB2BGR)
        
        # Crop
        img_edge = img_edge[data["bounding_box"][1]:data["bounding_box"][3], data["bounding_box"][0]:data["bounding_box"][2]]
        img_filtered = img_filtered[data["bounding_box"][1]:data["bounding_box"][3], data["bounding_box"][0]:data["bounding_box"][2]]
        
        # Save
        cv2.imwrite(output_dir+data["category_name"]+"/filtered/"+data["filename"]+".jpg", img_filtered)
        cv2.imwrite(output_dir+data["category_name"]+"/edges/"+data["filename"]+".jpg", img_edge)

In [108]:
width_avg, height_avg = 0, 0
width_tot, height_tot = 0, 0
total = 0
for matched_data_arr in req_file:
    for data in matched_data_arr:
        # (left, top, right, bottom)
        width_tot += abs(data["bounding_box"][0] - data["bounding_box"][2])
        height_tot += abs(data["bounding_box"][1] - data["bounding_box"][3])
        total += 1
width_avg = width_tot / total
height_avg = height_tot / total
print("Averages:  Width: {:.2f}, Height: {:.2f}, Ratio (W/H): {:.2f}".format(width_avg, height_avg, width_avg/height_avg))

Averages:  Width: 221.56, Height: 428.07, Ratio (W/H): 0.52
