---
# Crop, Resize, and Seperate Classes

- Reads original image + bbox
- Crops to bbox
- resizes to 
---

In [1]:
import os
import imutils
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image

**Create Preprecessor for images**

In [2]:
def area_bbox(bb):
    w = bb[2] - bb[0]
    h = bb[3] - bb[1]
    return w*h

In [3]:
# Pyimage search DL4CV, "Practioners Bundle", chapter 2
class AspectAwarePreprocessor:
    def __init__(self, width, height, inter=cv2.INTER_AREA):
        # store the target image width, height, and interpolation
        # method used when resizing
        self.width = width
        self.height = height
        self.inter = inter
        
    def pad_to_square(self,image):
        (w,h) = image.shape[:2]
        w_pad = (h-w)// 2 if h > w else 0
        h_pad = (w-h)// 2 if w > h else 0
        imagePad = cv2.copyMakeBorder(image,w_pad,w_pad,h_pad,h_pad,cv2.BORDER_CONSTANT,value=[0,0,0])
        #print(f"Image height={h}\nImage widt={w}")
        #print(f"This is crop width={w_pad}\nThis is crop height={h_pad}")
        return imagePad
    
    def crop_to_square(self,image,bbox):
        w,h = image.shape[:2]
        wBB,hBB = bbox[2]-bbox[0],bbox[3]-bbox[1]
        dW = (hBB-wBB) // 2 if hBB > wBB else 0
        dH = (wBB-hBB) // 2 if wBB > hBB else 0
        cropBB = [max(bbox[0]-dW,0),
                  max(bbox[1]-dH,0),
                  min(bbox[2]+dW,w),
                  min(bbox[3]+dH,h)]
        imgCrop = image[cropBB[1]:cropBB[3],cropBB[0]:cropBB[2],:]
        return imgCrop
        

    def preprocess(self, image,bbox=None,pad_to_square=False,crop_to_square=False):
        if bbox is not None:
            assert type(bbox) is tuple or \
                   type(bbox) is list  and \
                   len(bbox) == 4, \
                    "Invalid object supplied for bbox. Should be 4-tpl"
        if pad_to_square: image = self.pad_to_square(image)
        if crop_to_square: image = self.crop_to_square(image,bbox)
        # grab the dimensions of the image and then initialize
        # the deltas to use when cropping
        (h, w) = image.shape[:2]
        dW = 0
        dH = 0

        # if the width is smaller than the height, then resize
        # along the width (i.e., the smaller dimension) and then
        # update the deltas to crop the height to the desired
        # dimension
        if w < h:
            image = imutils.resize(image, width=self.width,
                inter=self.inter)
            dH = int((image.shape[0] - self.height) / 2.0)

        # otherwise, the height is smaller than the width so
        # resize along the height and then update the deltas
        # crop along the width
        else:
            image = imutils.resize(image, height=self.height,
                inter=self.inter)
            dW = int((image.shape[1] - self.width) / 2.0)

        # now that our images have been resized, we need to
        # re-grab the width and height, followed by performing
        # the crop
        (h, w) = image.shape[:2]
        image = image[dH:h - dH, dW:w - dW]

        # finally, resize the image to the provided spatial
        # dimensions to ensure our output image is always a fixed
        # size
        return cv2.resize(image, (self.width, self.height),
            interpolation=self.inter)

---

Build data structures for training partition

In [4]:
with open('../data/Anno/list_eval_partition.txt','r') as fp:
    allPartition = [ l.rstrip('\n') for l in fp ][2:]
    allPartition = [ l.split() for l in allPartition] 
                     # if l.split()[1] == 'train' or \
                     #    l.split()[1] == 'test']
                      #if l.split()[1] == 'val']

In [5]:
with open('../data/Anno/list_category_cloth.txt','r') as fp:
    lines = [ l.rstrip().replace('1','upper') \
                        .replace('2','lower') \
                        .replace('3','full').split() for l in fp][2:]  
    mainCatagories = { l[0]:l[1] for l in lines}

In [6]:
with open('../data/Anno/list_bbox.txt','r') as fp:
    lines = [l.rstrip('\n').split() for l in fp][2:]
    bbox_dict = { l[0]: [int(l[1]),int(l[2]),int(l[3]),int(l[4]) ] for l in lines}

Create directory structures

In [7]:
sourceDir = '../data/Img/'
targetDir = '../dataCropSQ/Img/'
targetAnnoDir = '../dataCropSQ/Anno/'

In [8]:
# create directories 
#dirSet = { l.split('/')[1] for l in bbox_dict.keys()}
#for d in dirSet:
#    targetPath = os.path.join(targetDir,d)
#    if not os.path.exists(targetPath): os.mkdir(targetPath)
#    #break



In [9]:
# create folders with all files
pp = AspectAwarePreprocessor(200,200)
trainImgList = []
#allPartition = allPartition[:500]

In [10]:
for sImgKey,partition in allPartition:
    sourceImgPath = sImgKey.replace('img/',sourceDir)
    targetImgPath = sImgKey.replace('img/',targetDir)
    tDir = os.path.dirname(targetImgPath)
    if not os.path.exists(tDir): os.mkdir(tDir)
    if area_bbox(bbox_dict[sImgKey]) < 0:
        print(f"negative bbox area for img={sImgKey}, ..skipping")
        continue
    try:
        sImg = plt.imread(sourceImgPath)
        tImg = pp.preprocess(sImg,bbox=bbox_dict[sImgKey],crop_to_square=True)
        plt.imsave(targetImgPath,tImg)
    except:
        print(f"failed on img={sImgKey}, ..skipping")
        continue
    trainImgList.append([sImgKey,partition])  # only save image if was success
    
              
# write the images which were processed
for cl_main in ['upper','lower','full']:
    with open(os.path.join(targetAnnoDir,cl_main + '_train_list.csv'),'w') as trainfp, \
         open(os.path.join(targetAnnoDir,cl_main + '_test_list.csv'),'w') as testfp, \
         open(os.path.join(targetAnnoDir,cl_main + '_val_list.csv'),'w') as valfp:
        trainfp.write('x_col,y_col\n')
        testfp.write('x_col,y_col\n')
        valfp.write('x_col,y_col\n')
        for imgk,part in trainImgList:
            cl_class = imgk.split('/')[1].split('_')[-1]
            if not mainCatagories[cl_class] == cl_main: continue
            arg1 = imgk.replace('img/',targetDir)
            arg2 = cl_class
            #print(f"arg1={arg1}\narg2={arg2}\npart={part}")
            if part == 'train': trainfp.write(arg1 + ',' + arg2 + '\n')
            if part == 'test':   testfp.write(arg1 + ',' + arg2 + '\n')
            if part == 'val':     valfp.write(arg1 + ',' + arg2 + '\n')


failed on img=img/Side-Slit_Striped_Blouse/img_00000046.jpg, ..skipping
failed on img=img/Split_V-Neck_Chiffon_Blouse/img_00000077.jpg, ..skipping
failed on img=img/Tartan_Plaid_Blouse/img_00000025.jpg, ..skipping
failed on img=img/Tuxedo-Inspired_Blazer/img_00000005.jpg, ..skipping
failed on img=img/Tuxedo-Inspired_Blazer/img_00000110.jpg, ..skipping
failed on img=img/Windowpane_Print_Blouse/img_00000021.jpg, ..skipping
failed on img=img/Windowpane_Print_Blouse/img_00000025.jpg, ..skipping
failed on img=img/Windowpane_Print_Blouse/img_00000026.jpg, ..skipping
failed on img=img/Windowpane_Print_Blouse/img_00000041.jpg, ..skipping
failed on img=img/Zip_Pocket_Blazer/img_00000084.jpg, ..skipping
failed on img=img/Cap-Sleeved_Crepe_Blouse/img_00000045.jpg, ..skipping
failed on img=img/Chambray_Popover_Blouse/img_00000018.jpg, ..skipping
failed on img=img/Chambray_Popover_Blouse/img_00000032.jpg, ..skipping
failed on img=img/Chambray_Popover_Blouse/img_00000059.jpg, ..skipping
failed on im

failed on img=img/Today_Workout_Tank/img_00000055.jpg, ..skipping
failed on img=img/Tribal_Pattern_Cardigan/img_00000014.jpg, ..skipping
failed on img=img/Tribal_Pattern_Tee/img_00000072.jpg, ..skipping
failed on img=img/Trouble_Graphic_Sweater/img_00000052.jpg, ..skipping
failed on img=img/Trouble_Graphic_Sweater/img_00000078.jpg, ..skipping
failed on img=img/V-Neck_Pocket_Tee/img_00000024.jpg, ..skipping
failed on img=img/Varsity-Striped_18_Tee/img_00000007.jpg, ..skipping
failed on img=img/Varsity-Striped_Dolphin_Tee/img_00000001.jpg, ..skipping
failed on img=img/Wild_MTV_Tee/img_00000011.jpg, ..skipping
failed on img=img/Wild_MTV_Tee/img_00000035.jpg, ..skipping
failed on img=img/Young_Romance_Sweater/img_00000032.jpg, ..skipping
failed on img=img/Young,_Beautiful,_&_Glamorous_Sweater/img_00000001.jpg, ..skipping
failed on img=img/Young,_Beautiful,_&_Glamorous_Sweater/img_00000002.jpg, ..skipping
failed on img=img/Young,_Beautiful,_&_Glamorous_Sweater/img_00000003.jpg, ..skipping
f

failed on img=img/Cowl_Neck_Tunic_Sweater/img_00000043.jpg, ..skipping
failed on img=img/Cr&egrave;me_De_La_Cr&egrave;me_Muscle_Tee/img_00000012.jpg, ..skipping
failed on img=img/Crew_Neck_Knit_Tee/img_00000027.jpg, ..skipping
failed on img=img/Crochet_Lace-Trimmed_Sweater/img_00000045.jpg, ..skipping
failed on img=img/Crochet-Back_Tank/img_00000051.jpg, ..skipping
failed on img=img/Cropped_Faux_Fur_Jacket/img_00000085.jpg, ..skipping
failed on img=img/Cuffed-Sleeve_Pocket_Tee/img_00000021.jpg, ..skipping
failed on img=img/Daisy_Crochet_Cardigan/img_00000049.jpg, ..skipping
failed on img=img/Deep_V-Neck_Sweater/img_00000067.jpg, ..skipping
failed on img=img/Denim_Hoodie_Jacket/img_00000016.jpg, ..skipping
failed on img=img/Denim_Utility_Jacket/img_00000062.jpg, ..skipping
failed on img=img/Designer_Bags_Graphic_Tee/img_00000039.jpg, ..skipping
failed on img=img/Different_Tee/img_00000030.jpg, ..skipping
failed on img=img/Different_Tee/img_00000034.jpg, ..skipping
failed on img=img/Diff

failed on img=img/Metallic_Tribal-Inspired_Shorts/img_00000003.jpg, ..skipping
failed on img=img/Metallic_Brocade_Mini_Skirt/img_00000042.jpg, ..skipping
failed on img=img/Mesh_Zippered_Mini_Skirt/img_00000008.jpg, ..skipping
failed on img=img/Mesh-Paneled_Colorblock_Workout_Shorts/img_00000025.jpg, ..skipping
failed on img=img/Mesh_Overlay_Shorts/img_00000009.jpg, ..skipping
failed on img=img/Marled_Tuxedo-Stripe_Joggers/img_00000026.jpg, ..skipping
failed on img=img/Marled_Knit_Sweatpants/img_00000031.jpg, ..skipping
failed on img=img/Marled_Knee-Length_Shorts/img_00000027.jpg, ..skipping
failed on img=img/Luxe_Voyager_Shorts/img_00000029.jpg, ..skipping
failed on img=img/Low-Rise_-_Distressed_Skinny_Jeans/img_00000053.jpg, ..skipping
failed on img=img/Linen_Chambray_Shorts/img_00000034.jpg, ..skipping
failed on img=img/Life_in_Progress_Pinstripe_Denim_Shorts/img_00000011.jpg, ..skipping
failed on img=img/Life_in_Progress_Pinstripe_Denim_Shorts/img_00000016.jpg, ..skipping
failed on 

failed on img=img/Distressed_Chino_Joggers/img_00000019.jpg, ..skipping
failed on img=img/Distressed_Cropped_Jeans/img_00000036.jpg, ..skipping
failed on img=img/Distressed_Cropped_Jeans/img_00000042.jpg, ..skipping
failed on img=img/Distressed_Denim_Bermuda_Shorts/img_00000027.jpg, ..skipping
failed on img=img/Distressed_Denim_Bermuda_Shorts/img_00000033.jpg, ..skipping
failed on img=img/Distressed_Denim_Bermuda_Shorts/img_00000052.jpg, ..skipping
failed on img=img/Distressed_Denim_Skirt/img_00000021.jpg, ..skipping
failed on img=img/Distressed_Light_Wash_-_Slim_Fit_Jeans/img_00000005.jpg, ..skipping
failed on img=img/Distressed_Low-Rise_Skinny_Jeans/img_00000001.jpg, ..skipping
failed on img=img/Distressed_Straight-Leg_Jeans/img_00000054.jpg, ..skipping
failed on img=img/Ditsy_Floral_PJ_Shorts/img_00000001.jpg, ..skipping
failed on img=img/Ditsy_Floral-Patterned_Shorts/img_00000037.jpg, ..skipping
failed on img=img/Knife-Pleated_Floral_Skirt/img_00000058.jpg, ..skipping
failed on img

failed on img=img/Palm_Tree_Print_Shorts/img_00000043.jpg, ..skipping
failed on img=img/Paisley_Print_Maxi_Skirt/img_00000026.jpg, ..skipping
failed on img=img/Painted_Floral_Denim_Shorts/img_00000033.jpg, ..skipping
failed on img=img/Paisley_Crochet_Skirt/img_00000015.jpg, ..skipping
failed on img=img/Ombr&eacute;_Contrast_Workout_Shorts/img_00000009.jpg, ..skipping
failed on img=img/Mom_Fit_Jeans/img_00000016.jpg, ..skipping
failed on img=img/Mom_Fit_Jeans/img_00000051.jpg, ..skipping
failed on img=img/Must-Have_Slit_Maxi_Skirt/img_00000042.jpg, ..skipping
failed on img=img/Nautical_Print_Shorts/img_00000041.jpg, ..skipping
failed on img=img/Striped_Southwestern-Patterned_Shorts/img_00000029.jpg, ..skipping
failed on img=img/Mixed_Print_Maxi_Skirt/img_00000065.jpg, ..skipping
failed on img=img/Striped_Midi_Skirt/img_00000063.jpg, ..skipping
failed on img=img/Striped_Pencil_Skirt/img_00000074.jpg, ..skipping
failed on img=img/Striped_Pencil_Skirt/img_00000077.jpg, ..skipping
failed on

failed on img=img/Daisy-Embroidered_Denim_Shorts/img_00000040.jpg, ..skipping
failed on img=img/Dark_Wash_-_Paneled_Skinny_Jeans/img_00000022.jpg, ..skipping
failed on img=img/Dark_Wash_-_Slim_Fit_Jeans/img_00000004.jpg, ..skipping
failed on img=img/Darling_Dots_Denim_Shorts/img_00000029.jpg, ..skipping
failed on img=img/Denim_A-Line_Skirt/img_00000062.jpg, ..skipping
failed on img=img/Denim_Dolphin_Hem_Shorts/img_00000046.jpg, ..skipping
failed on img=img/Destroyed_Cropped_Jeans/img_00000021.jpg, ..skipping
failed on img=img/Tie-Dye_Ikat_Print_Kimono/img_00000008.jpg, ..skipping
failed on img=img/Textured_Floral_Bodycon_Dress/img_00000045.jpg, ..skipping
failed on img=img/Textured_Cutout_Jumpsuit/img_00000028.jpg, ..skipping
failed on img=img/Boxy_Shirt_Dress/img_00000004.jpg, ..skipping
failed on img=img/Buttoned_Denim_Shirt_Dress/img_00000019.jpg, ..skipping
failed on img=img/Abstract_Self-Tie_Back_Dress/img_00000027.jpg, ..skipping
failed on img=img/Abstract_Splatter_Print_Dress/im

In [13]:
print("Total training images=",len(allPartition))
print("Actual images written=",len(trainImgList))
print("Number of failed     =",len(allPartition)-len(trainImgList))

Total training images= 289222
Actual images written= 288474
Number of failed     = 748


In [14]:
!head ../dataCropSQ/Anno/full_train_list.csv

x_col,y_col
../dataCropSQ/Img/Open-Knit_High-Slit_Maxi_Dress/img_00000001.jpg,Dress
../dataCropSQ/Img/Open-Knit_High-Slit_Maxi_Dress/img_00000002.jpg,Dress
../dataCropSQ/Img/Open-Knit_High-Slit_Maxi_Dress/img_00000004.jpg,Dress
../dataCropSQ/Img/Open-Knit_High-Slit_Maxi_Dress/img_00000005.jpg,Dress
../dataCropSQ/Img/Open-Knit_High-Slit_Maxi_Dress/img_00000007.jpg,Dress
../dataCropSQ/Img/Open-Knit_High-Slit_Maxi_Dress/img_00000008.jpg,Dress
../dataCropSQ/Img/Open-Knit_High-Slit_Maxi_Dress/img_00000010.jpg,Dress
../dataCropSQ/Img/Open-Knit_High-Slit_Maxi_Dress/img_00000011.jpg,Dress
../dataCropSQ/Img/Open-Knit_High-Slit_Maxi_Dress/img_00000012.jpg,Dress


In [16]:
!wc -l ../dataCropSQ/Anno/*.csv

   12506 ../dataCropSQ/Anno/full_test_list.csv
   65464 ../dataCropSQ/Anno/full_train_list.csv
   12520 ../dataCropSQ/Anno/full_val_list.csv
    8039 ../dataCropSQ/Anno/lower_test_list.csv
   42466 ../dataCropSQ/Anno/lower_train_list.csv
    8110 ../dataCropSQ/Anno/lower_val_list.csv
   19364 ../dataCropSQ/Anno/upper_test_list.csv
  100735 ../dataCropSQ/Anno/upper_train_list.csv
   19279 ../dataCropSQ/Anno/upper_val_list.csv
  288483 total


---
# Create Unique Name dataset

- for use with train_iterator.flow_from_dataframe
  which requires unique names
---

In [52]:
from shutil import copy2

In [50]:
sourceDir = '../dataCropSQ/Img'
targetDir = '../dataCropUN/Img'
partitionDict = {'/'.join(k.split('/')[-2:]):v for k,v in allPartition}
class_count ={}


In [27]:
for fullpath,_,files in os.walk(sourceDir,topdown=False):
    dirname = fullpath.split('/')[-1]
    cl_name = dirname.split('_')[-1]
    
    if class_count.get(cl_name) is None: class_count[cl_name] = 0
    for f in files:
        DAN YOU LEFT OFF HERE: if this flow_from_datafram did not work with the current defintion
            then you have to rename the files to a unique name and try again
    
    

In [30]:
f"{1:012d}"

'000000000001'

In [32]:
cl = 'Anorak'
f"{cl}_{class_count[cl]:07d}.jpg"

'Anorak_0000160.jpg'

In [45]:
foo=list(os.walk(sourceDir,topdown=False))
foo

[('../dataCropSQ/Img/Young,_Beautiful,_&_Glamorous_Sweater', [], []),
 ('../dataCropSQ/Img/Hooded_Puffer_Jacket',
  [],
  ['img_00000015.jpg',
   'img_00000003.jpg',
   'img_00000051.jpg',
   'img_00000020.jpg',
   'img_00000013.jpg',
   'img_00000039.jpg',
   'img_00000044.jpg',
   'img_00000047.jpg',
   'img_00000050.jpg',
   'img_00000072.jpg',
   'img_00000043.jpg',
   'img_00000064.jpg',
   'img_00000021.jpg',
   'img_00000025.jpg',
   'img_00000073.jpg',
   'img_00000054.jpg',
   'img_00000069.jpg',
   'img_00000066.jpg',
   'img_00000024.jpg',
   'img_00000011.jpg',
   'img_00000018.jpg',
   'img_00000056.jpg',
   'img_00000078.jpg',
   'img_00000063.jpg',
   'img_00000017.jpg',
   'img_00000046.jpg',
   'img_00000071.jpg',
   'img_00000008.jpg',
   'img_00000057.jpg',
   'img_00000053.jpg',
   'img_00000060.jpg',
   'img_00000070.jpg',
   'img_00000031.jpg',
   'img_00000019.jpg',
   'img_00000034.jpg',
   'img_00000012.jpg',
   'img_00000042.jpg',
   'img_00000074.jpg',
   'im