In [None]:
import glob
from pathlib import Path
from tqdm import tqdm
import pandas as pd
import os,sys

In [None]:
import cv2
from PIL import Image
import re
import pickle

In [None]:
def bbox_voc2yolo(bx, w, h):
    x1 = bx[0]
    y1 = bx[1]
    x2 = bx[2]
    y2 = bx[3]
    w_bbox = x2 - x1
    h_bbox = y2 - y1
    x_c = x1 + w_bbox/2
    y_c = y1 + h_bbox/2
    bbox = [x_c/w, y_c/h, w_bbox/w, h_bbox/h]
    return bbox

def bbox_yolo2voc(bx, w, h):
    x_c = float(bx[0])*w
    y_c = float(bx[1])*h
    wbbox = float(bx[2])*w
    hbbox = float(bx[3])*h
    x1 = int(x_c - wbbox/2)
    y1 = int(y_c - hbbox/2)
    x2 = int(x_c + wbbox/2)
    y2 = int(y_c + hbbox/2)
    bbox = [x1, y1, x2, y2]
    return bbox

def bbox_coco2voc(bbox):
    bbox[2] = bbox[0] + bbox[2]
    bbox[3] = bbox[1] + bbox[3]
    return bbox

In [None]:
def draw_bbox(img_np, box_coco):
    id_ = box_coco[0]
    if not isinstance(id_, str):
        id_ = int(id_)
    x1 = int(box_coco[1])
    y1 = int(box_coco[2])
    x2 = int(box_coco[3])
    y2 = int(box_coco[4])
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.rectangle(img_np,(x1, y1),(x2, y2),(0,0,255),thickness=2)
    f = cv2.putText(img_np, text=str(id_), org=(x1 + 5, y1 + 5), fontFace=font, fontScale=0.7, 
        thickness=2, lineType=cv2.LINE_AA, color=(0, 255, 0))
    return img_np

In [None]:
def read_bboxes(txtp):
    with open(txtp, 'r') as f:
        txt = f.readlines()
    return txt

def get_bboxes_text(txtp):
    import re
    tx_str = read_bboxes(txtp)
    bboxes = []
    for line in tx_str:
        line_list = re.split(r'[\s\n\t]',line)
        bx = []
        bx.append(int(line_list[0]))
        for item in line_list[1:]:
            if len(item.strip())>0:
                bx.append(float(item))
        bboxes.append(bx)
    return bboxes

In [None]:
def bbox_str2num(line):
    import re
    line_list = re.split(r'[\s\n\t]',line)
    bx = []
    bx.append(int(line_list[0]))
    for item in line_list[1:]:
        if len(item.strip())>0:
            bx.append(float(item))
    bboxes.append(bx)
    return bboxes

In [None]:
def copy_data(plist, mode='copy'):
    ps = plist[0]
    pt = plist[1]
    shutil.copyfile(ps, pt)
    
def write_txt(input_list):
    label_path, text = input_list[0], input_list[1]
    with open(label_path, 'a+') as f:
        f.write(text)

In [None]:
# label_1_map = {'其他': -1, '腰带': 0, '帽子': 1, '鞋子': 2, '包': 3, '上装': 4, '裤子': 5, '裙子': 6, '连体装': 7}
label_1_map = {'其他': -1, '腰带': 0, '帽子': 1, '鞋子': 2, '包': 3, '上装': 4, '裤子': 5, '裙子': 6, '连体装': 7}

In [None]:
l2_map_l1 = {23: 2,
 24: 2, 25: 2,
 26: 2,
 27: 2,
 28: 2,
 29: 2,
 30: 2,
 31: 2,
 32: 2,
 33: 2,
 34: 2,
 35: 3,
 43: 2,
 0: 4,
 1: 4,
 2: 4,
 3: 4,
 4: 4,
 5: 4,
 6: 4,
 7: 4,
 8: 4,
 9: 4,
 10: 4,
 11: 4,
 12: 4,
 13: 4,
 14: 4,
 15: 6,
 16: 5,
 17: 5,
 18: 5,
 19: 5,
 20: 5,
 21: 7,
 22: 7,
 36: 4,
 37: 4,
 38: 4,
 39: 4,
 40: 4,
 41: 4,
 42: 4,
43: 2}

In [None]:
label_2_map = {
    '其他': -1,
    
    # 上装
    '小西装': 0, '毛衣/绒衣/针织衫': 1, '女衫类': 2, '皮衣': 3, '夹克/外套': 4, '风衣': 5, '马甲/背心': 6, '吊带': 7, '牛仔': 8, '羽绒服': 9, '大衣': 10, '皮草': 11, '卫衣': 12, '棉衣/羊羔绒': 13, '斗篷': 14,
    
    # 裙子
    '裙子': 15,
    
    # 裤子
    '西装裤': 16, '牛仔裤': 17, '休闲裤': 18, '打底裤': 19, '运动裤': 20,
    
    # 连体装
    '连衣裙': 21, '连衣裤': 22,
    # 包
    '包': 23,
    
    # 鞋子
    '正装女鞋': 24, '休闲鞋': 25,
}

In [None]:
with open('/home/chenyi/workspace/myPyModule99/class_mapping/dfashion2_map_l1.dict', 'rb') as f:
    dfashon2_map_l1 = pickle.load(f)

In [None]:
with open('/home/chenyi/workspace/myPyModule99/class_mapping/pedia_map_l1.dict', 'rb') as f:
    dfashon2_map_l1 = pickle.load(f)

In [None]:
with open('/home/chenyi/workspace/myPyModule99/class_mapping/category45.dict', 'rb') as f:
    names = pickle.load(f)

In [None]:
eng2chi = {}
for k,v in names.items():
    eng2chi[v[1]] = v[0]
    # print(v)
    # break

eng2num = {}
for k,v in names.items():
    eng2num[v[1]] = k
    # break

### Image Show

In [None]:
# with open('/home/chenyi/workspace/dataset/gallery_detect/csv/bboxes_df.csv', 'rb') as f:
#     bboxes_df = pickle.load(f)

In [None]:
# with open('/home/chenyi/workspace/dataset/gallery_detect/csv/bboxes_df_temp.pickle', 'wb') as f:
#     pickle.dump(bboxes_df,f)

In [None]:
df = dp_bx[dp_bx['source']=='fashionpedia']
df['bx3'] = df['bx3'].values/df['width'].values
df['bx4'] = df['bx4'].values/df['height'].values 
df = df[df['label8'] == 6]

In [None]:
df.head(2)

In [None]:
# ind = 1000
# # fname = str(df.loc[ind]['fname'])
# # p = root /'images/train'/fname
# p = str(df.loc[ind]['img_path'])
# # bboxes = [df.loc[ind][['label_num', 'xmin','ymin', 'xmax', 'ymax']].values.tolist()]
# # df.loc[ind]['label43']

In [None]:
label_1_map

In [None]:
i = 0
ind = df.index[i]
p = str(df.loc[ind]['img_sp'])
w,h = df.loc[ind][['width', 'height']].values.tolist()
bboxes = [df.loc[ind][['label8', 'bx1','bx2', 'bx3', 'bx4']].values.tolist()]
# df.loc[ind]['label43']

In [None]:
w,h

In [None]:
bboxes

In [None]:
img_np = cv2.imread(str(p))
img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB)
h = img_np.shape[0]
w = img_np.shape[1]
for bx in bboxes:
    bx[1:5] = bbox_yolo2voc(bx[1:], w, h)
    # bx[1:5] = bbox_coco2voc(bx[1:5])
    draw_bbox(img_np, bx)

In [None]:
Image.fromarray(img_np)

### 批量转yolo

In [None]:
def get_yolo_bbox(row):
    w,h = row['width'], row['height']
    bbox = row.loc[[ 'xmin','ymin', 'xmax', 'ymax']].values
    bbox = bbox_voc2yolo(bbox, w, h)
    bboxes_list.append(bbox)

In [None]:
# %%time
# df = hw_df
# bboxes_list = []
# _ = df.apply(lambda row: get_yolo_bbox(row), axis=1)

In [None]:
# col_name = 'xmin'
# ind = 0
# df[col_name] = [x[ind] for x in bboxes_list]

In [None]:
!ls /nas/chenyi/datasets_nas/openfashion

In [None]:
dp23 = pd.read_csv('/nas/chenyi/datasets_nas/openfashion/deepedia2/deepedia_catg23_yolo.csv')

In [None]:
dp23.head()

In [None]:
dp23.describe()

In [None]:
dp23.value_counts('source')

In [None]:
dp23[dp23['source']==dp23.value_counts('source').axes[0][0]].describe()

In [None]:
opf = pd.read_csv('/nas/chenyi/datasets_nas/openfashion/openfashion45_coco.csv')

In [None]:
# /nas/chenyi/datasets_nas/openfashion/openfashion_info.csv

In [None]:
opf.head()

In [None]:
len(opf)

In [None]:
opf.value_counts('source')

In [None]:
opf.value_counts('source').axes[0][0]

In [None]:
opf.describe()

In [None]:
opf[opf['source']==opf.value_counts('source').axes[0][0]].describe()

In [None]:
opf[opf['source']==opf.value_counts('source').axes[0][1]].describe()

In [None]:
opf[opf['source']==opf.value_counts('source').axes[0][2]].describe()

In [None]:
fp_df = opf[opf['source']==opf.value_counts('source').axes[0][2]]

In [None]:
opf.columns

In [None]:
dp23.loc[dp23[dp23['source']==dp23.value_counts('source').axes[0][1]].index, ['x_1', 'y_1', 'x_2', 'y_2', 'path']].head(2).values

In [None]:
opf.loc[opf[opf['source']==opf.value_counts('source').axes[0][2]].index, ['xmin', 'ymin', 'xmax', 'ymax', 'path']].head(2).values

In [None]:
# opf.loc[opf[opf['source']==opf.value_counts('source').axes[0][2]].index, ['xmin', 'ymin', 'xmax', 'ymax', 'path']] = dp23.loc[dp23[dp23['source']==dp23.value_counts('source').axes[0][1]].index, ['x_1', 'y_1', 'x_2', 'y_2', 'path']].values

In [None]:
opf = pd.read_csv('/nas/chenyi/datasets_nas/openfashion/allfashion/openfashion45_data_clear.csv')

In [None]:
# opf.loc[opf[opf['source']==opf.value_counts('source').axes[0][2]].index, ['xmin']] = \
#             opf.loc[opf[opf['source']==opf.value_counts('source').axes[0][2]].index, ['xmin']].values* opf.loc[opf[opf['source']==opf.value_counts('source').axes[0][2]].index, ['width']].values

In [None]:
opf.loc[:, ['ymin']] = \
            opf.loc[:, ['ymin']].values/ opf.loc[:, ['height']].values

In [None]:
opf[opf['source']==opf.value_counts('source').axes[0][2]].head(2)

In [None]:
opf.describe()

In [None]:
# opf.to_csv('/nas/chenyi/datasets_nas/openfashion/allfashion/openfashion45_data_clear.csv', index=False)

### 数据分析

In [None]:
!tree -L 2 /home/chenyi/workspace/dataset/data

In [None]:
!tree -L 2 /home/chenyi/workspace/dataset/deepedia/info

In [None]:
!tree -L 2 /home/chenyi/workspace/yolov5/datasets/deepedia_catg8

In [None]:
!tree -L 2 /home/chenyi/workspace/yolov5/datasets/deepedia_catg8

In [None]:
dpedia_bx4 = pd.read_csv('/home/chenyi/workspace/dataset/deepedia/info/deepedia/dpedia_bx4.csv')

In [None]:
dpedia_bx4.head(2)

In [None]:
len(dpedia_bx4)

#### fashionpedia的数据

In [None]:
import pickle

In [None]:
# of_df  = pd.read_csv('/nas/lichangjian/open_fashion/df_OpenFashion.csv')

In [None]:
dp_bx_df.to_csv('/home/chenyi/workspace/dataset/deepedia/info/deepedia/pedia94.csv', index=False)

In [None]:
with open('/home/chenyi/workspace/dataset/deepedia/info/pickle/pedia_train_dict.pickle', 'rb') as f:
    dp_dict = pickle.load(f)

In [None]:
!ls /home/chenyi/workspace/dataset/deepedia/info

In [None]:
pedia_map_l1.keys()

In [None]:
pedia2yolo_df.tail(2)

In [None]:
pedia_map_l1 = {19: 0,
 14: 1,
 23: 2,
 24: 3,
 0: 4,
 1: 4,
 2: 4,
 3: 4,
 4: 4,
 5: 4,
 6: 5,
 7: 5,
 8: 6,
 9: 4,
 10: 7,
 11: 7}

In [None]:
dp_df = pd.DataFrame()

In [None]:
dp_df['fname'] = dp_dict['file_name']
dp_df['bbox'] = dp_dict['bbox']
dp_df['width'] = dp_dict['width']
dp_df['height'] = dp_dict['height']

In [None]:
dpedia_df = dp_df

In [None]:
img_sp_list = []
bboxes_list = []
w_list = []
h_list = []
mode_list = []
bx_id_list = []
for ind, row in tqdm(dpedia_df.iterrows()):
    bboxes = row['bbox']
    p = row['fname']
    w = row['width']
    h = row['height']
    # mode = row['train']
    bx_id = 0
    for bx in bboxes:
        bboxes_list.append(bx)
        img_sp_list.append(p)
        w_list.append(w)
        h_list.append(h)
        # mode_list.append(mode)
        bx_id_list.append(bx_id)
        bx_id += 1
dp_bx_df = pd.DataFrame()
dp_bx_df['fname'] = img_sp_list
dp_bx_df['bbox'] = bboxes_list
dp_bx_df['id_bbox'] = bx_id_list
dp_bx_df['width'] = w_list
dp_bx_df['height'] = h_list
# dp_bx_df['mode'] = mode_list

In [None]:
dp_bx_df['label8'] = dp_bx_df['bbox'].apply(lambda x: int(x[0]))

In [None]:
dp_bx_df['bx3'] = dp_bx_df['bbox'].apply(lambda x: x[3])

In [None]:
dp_bx_df['bx4'] = dp_bx_df['bbox'].apply(lambda x: x[4])

In [None]:
for ind in dp_bx_df.value_counts('label8').index:
    if ind not in pedia_map_l1.keys():
        pedia_map_l1[ind] = -1

In [None]:
dp_bx_df.rename(columns={'label8':'label_l2'}, inplace=True)

In [None]:
dp_bx_df['label8'] = dp_bx_df['bbox'].apply(lambda x: pedia_map_l1[int(x[0])])

In [None]:
dp_bx_df.value_counts('label8').sort_index()

In [None]:
pedia_df =  dp_bx_df

In [None]:
pedia_df.tail(2)

In [None]:
pedia_df['img_sp'] = pedia_df['fname'].apply(lambda x: f'/home/chenyi/workspace/dataset/data/data_fashionpedia/train/{x}')

In [None]:
!ls /home/chenyi/workspace/dataset/deepedia/info/deepedia/dpedia_bx4.csv /home/chenyi/workspace/dataset/deepedia/info/deepedia/dpedia_bx4_old.csv

In [None]:
dp_bx_old = pd.read_csv('/home/chenyi/workspace/dataset/deepedia/info/deepedia/dpedia_bx4.csv')

In [None]:
len(dp_bx_old)

In [None]:
df2_df = dp_bx_old[dp_bx_old['source']=='deepfashion2']
fp_df2 = dp_bx_old[dp_bx_old['source']=='fashionpedia']

In [None]:
len(df2_df),len(fp_df2)

In [None]:
fp_df2.value_counts('label8')

In [None]:
dp_bx = pd.concat([df2_df, pedia_df])

In [None]:
dp_bx.tail()

In [None]:
len(df2_df),len(dp_bx)

#### deepfashon2

In [None]:
!tree -L 1 /home/chenyi/workspace/dataset/deepedia/info

In [None]:
fashion2_df.head()

In [None]:
# fashion2_df.to_csv('/home/chenyi/workspace/dataset/deepedia/info/fashion2_img.csv', index=False)

In [None]:
def get_wh(p):
    img = Image.open(p)
    w,h = img.size
    w_list.append(w)
    h_list.append(h)

In [None]:
h_list = []
w_list = []
for p in tqdm(fashion2_df['img_path']):
    get_wh(p)
    # break

In [None]:
# fashion2_df['width'] = w_list

In [None]:
# fashion2_df['height'] = h_list

In [None]:
fashion2_df = pd.concat([f2yolo_train, f2yolo_val], axis=0)

In [None]:
fashion2_df.tail(2)

In [None]:
import re

In [None]:
fashion2_df['train'] = fashion2_df['img_path'].apply(lambda x: re.findall(r'/images/([a-z]{3,5})/', x)[0])

In [None]:
len(fashion2_df)

In [None]:
fashion2_df.head()

#### deepedia BBox

In [None]:
!tree -L 2 /home/chenyi/workspace/dataset/deepedia/info/

In [None]:
dpedia_df.head(2)

In [None]:
dpedia_df = pd.DataFrame()

In [None]:
dpedia_df['img_sp'] = pedia2yolo_df['img_path'].values.tolist() + fashion2_df['img_path'].values.tolist()

In [None]:
dpedia_df['bboxes'] = pedia2yolo_df['bbox'].values.tolist() + fashion2_df['bbox'].values.tolist()

In [None]:
dpedia_df['width'] = pedia2yolo_df['width'].values.tolist() + fashion2_df['width'].values.tolist()

In [None]:
dpedia_df['height'] = pedia2yolo_df['height'].values.tolist() + fashion2_df['height'].values.tolist()

In [None]:
dpedia_df['mode'] = pedia2yolo_df['train'].values.tolist() + fashion2_df['train'].values.tolist()

In [None]:
len(dpedia_df), len(pedia2yolo_df), len(fashion2_df)

In [None]:
dpedia_df.tail()

In [None]:
dpedia_df['len'] = dpedia_df['bboxes'].apply(lambda x: len(x))

In [None]:
dpedia_df.to_csv('/home/chenyi/workspace/dataset/deepedia/info/dpedia_img.csv', index=False)

In [None]:
dp_bx4_df.to_csv('/home/chenyi/workspace/dataset/deepedia/info/deepedia/dpedia_bx4.csv', index=False)

In [None]:
# with open('/home/chenyi/workspace/dataset/deepedia/info/dp_bxes_df.pickle', 'wb') as f:
#     pickle.dump(dp_bx_df, f)

In [None]:
img_sp_list = []
bboxes_list = []
w_list = []
h_list = []
mode_list = []
bx_id_list = []
for ind, row in tqdm(dpedia_df.iterrows()):
    bboxes = row['bboxes']
    p = row['img_sp']
    w = row['width']
    h = row['height']
    mode = row['mode']
    bx_id = 0
    for bx in bboxes:
        bboxes_list.append(bx)
        img_sp_list.append(p)
        w_list.append(w)
        h_list.append(h)
        mode_list.append(mode)
        bx_id_list.append(bx_id)
        bx_id += 1
dp_bx_df = pd.DataFrame()
dp_bx_df['img_sp'] = img_sp_list
dp_bx_df['bbox'] = bboxes_list
dp_bx_df['id_bbox'] = bx_id_list
dp_bx_df['width'] = w_list
dp_bx_df['height'] = h_list
dp_bx_df['mode'] = mode_list

In [None]:
dp_bx_df.head(2)

In [None]:
dp_bx_df['source'] = dp_bx_df['img_sp'].apply(lambda x:re.findall('/data/(.*?)/', x)[0])

In [None]:
dp_bx_df[dp_bx_df['source']=='fashionpedia'].value_counts('label8').sort_index()

In [None]:
dp_bx_df[dp_bx_df['source']=='deepfashion2'].value_counts('label8').sort_index()

In [None]:
len(dp_bx_df)

In [None]:
dp_bx_df['label8'] = dp_bx_df['bbox'].apply(lambda x: int(x[0]))
dp_bx_df['bx4'] = dp_bx_df['bbox'].apply(lambda x: x[4])

In [None]:
dp_bx4_df = dp_bx_df.drop(columns='bbox')

In [None]:
dp_bx_df['bx3'] = dp_bx_df['bx3'].values*dp_bx_df['width'].values

In [None]:
dp_bx_df['bx4'] = dp_bx_df['bx4'].values*dp_bx_df['height'].values

In [None]:
dp6_bx_df = dp_bx_df[dp_bx_df['label8']>=2]

In [None]:
len(dp6_bx_df)

In [None]:
dp_bx4_df.head()

In [None]:
df2 = dp_bx4_df[dp_bx4_df['source']=='deepfashion2']

In [None]:
dp_df = dp_bx4_df[dp_bx4_df['source']=='fashionpedia']

In [None]:
dp_df.head(2)

In [None]:
label_df2 = df2['label8'].apply(lambda x: dfashon2_map_l1[x])

In [None]:
len(label_df2), len(dp_bx4_df),len(dp_df)

In [None]:
dp_bx4_df.loc[label_df2.index, 'label8'] = label_df2.values

In [None]:
dp_bx4_df['label8'] = dp_bx_df['label8'].values

In [None]:
dp_df.value_counts('label8')

In [None]:
dp_df.value_counts('label8').sort_index()

In [None]:
dp_bx4_df.value_counts('label8').sort_index()

In [None]:
dp_bx4_df.head()

### 整体数据清洗

In [None]:
dp_bx = pd.read_csv('/home/chenyi/workspace/dataset/deepedia/info/deepedia/dpedia_bx_94.csv')

In [None]:
# dp_bx.to_csv('/home/chenyi/workspace/dataset/deepedia/info/deepedia/dpedia_bx_94.csv', index=False)

In [None]:
dp_clr_temp.to_csv('/home/chenyi/workspace/dataset/deepedia/info/deepedia/dpedia_clr_temp.csv', index=False)

In [None]:
dp_bx.value_counts('label8').sort_index()

In [None]:
dp_bx_temp  = dp_bx[dp_bx['label8']>=0]

In [None]:
len(dp_bx_temp), len(dp_bx_temp.drop_duplicates('img_sp'))

In [None]:
dp_bx_temp.head()

In [None]:
dp_bx_temp.describe()

#### 图片大小

In [None]:
dp_bx_temp = dp_bx_temp[(dp_bx_temp['height']>300) & (dp_bx_temp['width']>300)]

#### BBOX大小

In [None]:
dp_bx_small = dp_bx_temp[(dp_bx_temp['bx3']<10) & (dp_bx_temp['bx4']<10)]

In [None]:
len(dp_bx_small)

In [None]:
dp_bx_small.value_counts('label8')

In [None]:
dp_bx_temp = dp_bx_temp[(dp_bx_temp['bx3']>10) & (dp_bx_temp['bx4']>10)]

In [None]:
dp_bx_temp.describe()

#### 位置

In [None]:
thred = 0.01
dp_bx_temp = dp_bx_temp[~(((dp_bx_temp['bx1']<thred) | (dp_bx_temp['bx2']<thred) | 
                     (dp_bx_temp['bx1']>1-thred) | (dp_bx_temp['bx2']>1-thred)))]

In [None]:
dp_bx_temp.describe()

In [None]:
dp_bx_temp.value_counts('label8').sort_index()

In [None]:
len(dp_bx_temp),len(dp_bx)

### 每个类的清洗

In [None]:
dp_bx_temp.value_counts('label8').sort_index()

In [None]:
dp_bx_shoe = dp_bx_temp[dp_bx_temp['label8']==2]

dp_bx_bag = dp_bx_temp[dp_bx_temp['label8']==3]

dp_bx_456 = dp_bx_temp[(dp_bx_temp['label8']>=4) & (dp_bx_temp['label8']<=6)]

dp_bx_lt = dp_bx_temp[dp_bx_temp['label8']==7]

In [None]:
dp_bx_bag.describe()

##### 大物体

In [None]:
dp_bx_456.describe()

In [None]:
thred = 32
dp_bx_456_temp = dp_bx_456[((dp_bx_456['bx3']>thred) & (dp_bx_456['bx4']>thred))]

In [None]:
thred = 32
len(dp_bx_456[~((dp_bx_456['bx3']>thred) & (dp_bx_456['bx4']>thred))])

##### 连体装

In [None]:
dp_bx_lt.describe()

In [None]:
thred = 40
dp_bx_lt_temp = dp_bx_lt[((dp_bx_lt['bx3']>thred) & (dp_bx_lt['bx4']>thred))]

In [None]:
len(dp_bx_lt[~((dp_bx_lt['bx3']>thred) & (dp_bx_lt['bx4']>thred))])

##### 包

In [None]:
dp_bx_bag.describe()

In [None]:
thred = 24
dp_bx_bag_temp = dp_bx_bag[~((dp_bx_bag['bx3']<thred) | (dp_bx_bag['bx4']<thred))]

In [None]:
dp_bx_bag_temp.describe()

##### 鞋

In [None]:
dp_bx_shoe.describe()

In [None]:
thred = 24
dp_bx_shoe_temp = dp_bx_shoe[~((dp_bx_shoe['bx3']<thred) | (dp_bx_shoe['bx4']<thred))]

In [None]:
dp_bx_shoe = dp_bx_temp[dp_bx_temp['label8']==2]

dp_bx_bag = dp_bx_temp[dp_bx_temp['label8']==3]

dp_bx_456 = dp_bx_temp[(dp_bx_temp['label8']>=4) & (dp_bx_temp['label8']<=6)]

dp_bx_lt = dp_bx_temp[dp_bx_temp['label8']==7]

In [None]:
dp_clr_temp = pd.concat([dp_bx_shoe_temp, dp_bx_bag_temp, dp_bx_456_temp, dp_bx_lt_temp], axis=0)

In [None]:
dp_clr_temp.describe()

In [None]:
len(dp_bx_temp), len(dp_clr_temp)

In [None]:
dp_clr_temp.value_counts('label8').sort_index()

##### CHECK Area

In [None]:
%%time
# 计算bbox的面积
df = ofashion43
area_list = []
for ind,row in tqdm(df.iterrows()):
    whWH = row.loc[['xmax', 'ymax']].values
    area  = 1
    for item in whWH:
        area *= item
    area_list.append(area)
    # break

In [None]:
ofashion43['area'] = area_list

##### bbox的个数

In [None]:
# %%time
# 计算bbox的个数
# ofashion43['len'] = 0
# for ind,p_ in tqdm(enumerate(pcounter.index)):
#     num = pcounter.values[ind]
#     ofashion43.loc[ofashion43[ofashion43['path'] == p_].index, 'len'] = num

In [None]:
len(ofashion31_clr.value_counts('label43'))

In [None]:
ofashion31_clr.value_counts('label43').sort_index()

In [None]:
hw_df[hw_df['len']==1].value_counts('label8').sort_index()

In [None]:
dp_df = ofashion31_clr[~(ofashion31_clr['source'] == 'haowei34k')]
dp_len1 = dp_df[dp_df['len']==1]

In [None]:
num2eng[1]

In [None]:
dp_len1.value_counts('label43').sort_index()

In [None]:
len(dp_len1)

In [None]:
ofashion31_clr_temp = ofashion31_clr[~ofashion31_clr['path'].isin(dp_len1['path'])]

In [None]:
len(ofashion31_clr_temp)

In [None]:
ofashion31_clr_temp.to_csv('/nas/chenyi/datasets_nas/openfashion/allfashion/openfashion31_clr_temp.csv', index=False)

##### 类别均衡

In [None]:
len(catg_df)

In [None]:
catg_df.describe()

In [None]:
catg_df.value_counts('label43')

In [None]:
counter_index = counter_hw[(counter_hw.values<7000) & (counter_hw.values>500)].index.tolist()

In [None]:
hw_stay3000_df = hw_df[hw_df['label43'].isin(counter_index)]

In [None]:
hw_stay_df = hw_df[hw_df['path'].isin(hw_stay3000_df['path'])]

In [None]:
len(hw_stay3000_df), len(hw_stay_df), len(hw_stay3000_df.drop_duplicates('path')), len(hw_df.drop_duplicates('path'))

In [None]:
counter_stay = hw_stay_df.value_counts('label43')

In [None]:
counter_hw[(counter_hw.values>5000)], len(counter_hw)

In [None]:
counter_stay[counter_stay.values>3000], len(counter_stay)

In [None]:
len(hw_stay_df[hw_stay_df['len']==1])

In [None]:
hw_stay_df[hw_stay_df['len']==1].value_counts('label43')

In [None]:
names[3]

In [None]:
hw_stay_df

In [None]:
len(counter_hw[(counter_hw.values>300)].axes[0]), len(counter_stay[(counter_stay.values>300)].axes[0])

In [None]:
of_cls_df = ofashion43[ofashion43['label43']>0]

### Display

In [None]:
hw_df = ofashion43[ofashion43['source']=='haowei34k']

In [None]:
hw_df.describe()

In [None]:
df = df[df['source']=='Fashionpedia']

In [None]:
def bbox_coco2voc(bbox):
    # bbox[0] = bbox[0] - bbox[2]/2
    # bbox[1] = bbox[1] - bbox[3]/2
    bbox[2] = bbox[0] + bbox[2]
    bbox[3] = bbox[1] + bbox[3]
    return bbox

In [None]:
p = df.iloc[300,1]

In [None]:
df.head(3)

In [None]:
x

In [None]:
bboxes = []
x = df[df['path']==p]
for ind, row in x.iterrows():
    bbox = row.loc[['label_2', 'x_1','y_1', 'x_2', 'y_2']].values.tolist()
    # bbox[1:] = bbox_coco2voc(bbox[1:])
    # bbox[0] = row['label2_eng']
    bboxes.append(bbox)

In [None]:
# bboxes = []
# x = df[df['path']==p]
# for ind, row in x.iterrows():
#     bbox = row.loc[['label43', 'xmin','ymin', 'xmax', 'ymax']].values.tolist()
#     bbox[1:] = bbox_coco2voc(bbox[1:])
#     bbox[0] = row['label2_eng']
#     bboxes.append(bbox)

In [None]:
img_np = cv2.imread(str(p))
img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB)
h = img_np.shape[0]
w = img_np.shape[1]
for bx in bboxes:
    bx[1:] = bbox_coco2voc(bx[1:])
    draw_bbox(img_np, bx)

In [None]:
bboxes

In [None]:
Image.fromarray(img_np)

#### Pandas画图

In [None]:
ofashion43_small.head()

In [None]:
df = ofashion43_small
i = 1100

In [None]:
ind = df.index[i]
p = df.loc[ind]['path']
bboxes = [df.loc[ind][['label43', 'xmin','ymin', 'xmax', 'ymax']].values.tolist()]
id_ = df.loc[ind, 'label2_eng']
df.loc[ind, 'source']

In [None]:
img_np = cv2.imread(str(p))
img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB)
h = img_np.shape[0]
w = img_np.shape[1]
for bx in bboxes:
    bx[0] = id_
    bx[1:] = bbox_coco2voc(bx[1:])
    draw_bbox(img_np, bx)

In [None]:
Image.fromarray(img_np)

### Display HTML

In [None]:

len(dp_bx_lt[~((dp_bx_lt['bx3']>thred) & (dp_bx_lt['bx4']>thred))])

In [None]:
dis_df = dp_bx_bag_temp.sort_values('bx3',ascending=False)

In [None]:
dis_df.head(2)

In [None]:
dis_img_df = dis_df.drop_duplicates('img_sp')

In [None]:
len(dis_df), len(dis_img_df)

In [None]:
ind = 0
p = dis_img_df['img_sp'].values[ind]
df_temp = dp_bx_temp[dp_bx_temp['img_sp'] == p]
df_temp.loc[:,'bx3'] = df_temp.loc[:,'bx3'].values/df_temp.loc[:,'width'].values
df_temp.loc[:,'bx4'] = df_temp.loc[:,'bx4'].values/df_temp.loc[:,'height'].values
bboxes = df_temp.loc[:,['label8', 'bx1', 'bx2', 'bx3', 'bx4']].values.tolist()
df_temp.loc[:,['source']].values[0]

In [None]:
label_1_map

In [None]:
df_temp[df_temp['img_sp'] == p]

In [None]:
img_np = cv2.imread(str(p))
img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB)
h = img_np.shape[0]
w = img_np.shape[1]
for bx in bboxes:
    bx[1:] = bbox_yolo2voc(bx[1:], w, h)
    draw_bbox(img_np, bx)

In [None]:
Image.fromarray(img_np)

#### Recording Delete Dirty image by Dispalye

In [None]:
# if 0:
#     del_df = pd.read_csv('/nas/chenyi/datasets_nas/openfashion/allfashion/del_df_temp0511.csv')
#     p_del_list = del_df['path'].values.tolist()

In [None]:
# del_df = pd.DataFrame()
# del_df['path'] = ''
# p_del_list = []

In [None]:
# p_del_list.pop(0)

In [None]:
p_del_list += dis_img_df['path'].values.tolist()

In [None]:
len(p_del_list)

In [None]:
p_del = p
p_del_list.append(p)
p_del_list = list(set(p_del_list))
len(p_del_list)

In [None]:
del_df = pd.DataFrame()
del_df['path'] = ''
del_df['path'] = p_del_list

In [None]:
del_df.tail().values, len(del_df)

In [None]:
if 1:
    del_df.to_csv('/nas/chenyi/datasets_nas/openfashion/allfashion/del_df_temp0511.csv', index=False)