In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import math
import torch
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import json
import pandas as pd
import pickle
import glob
import re
from tqdm import tqdm
from pathlib import Path
import sys,os

In [None]:
def bbox_yolo2voc(bx, w, h):
    x_c = float(bx[0])*w
    y_c = float(bx[1])*h
    wbbox = float(bx[2])*w
    hbbox = float(bx[3])*h
    x1 = int(x_c - wbbox/2)
    y1 = int(y_c - hbbox/2)
    x2 = int(x_c + wbbox/2)
    y2 = int(y_c + hbbox/2)
    bbox = [x1, y1, x2, y2]
    return bbox

In [None]:
def bbox_voc2yolo(bx, w, h):
    x1 = bx[0]
    y1 = bx[1]
    x2 = bx[2]
    y2 = bx[3]
    w_bbox = x2 - x1
    h_bbox = y2 - y1
    x_c = x1 + w_bbox/2
    y_c = y1 + h_bbox/2
    bbox = [x_c/w, y_c/h, w_bbox/w, h_bbox/h]
    return bbox

In [None]:
def bbox_draw(img_np, box_coco):
    id_ = 'test'
    x1 = box_coco[0]
    y1 = box_coco[1]
    x2 = box_coco[2]
    y2 = box_coco[3]
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.rectangle(img_np,(x1, y1),(x2, y2),(0,0,255),thickness=2)
    f = cv2.putText(img_np, text=str(id_), org=(x1 + 5, y1 + 5), fontFace=font, fontScale=1, 
        thickness=2, lineType=cv2.LINE_AA, color=(0, 255, 0))
    return img_np

In [None]:
def bbox_coco2voc(bbox):
    bbox[2] = bbox[0] + bbox[2]
    bbox[3] = bbox[1] + bbox[3]
    return bbox

In [None]:
def draw_bbox(img_np, box_coco):
    id_ = int(box_coco[0])
    x1 = int(box_coco[1])
    y1 = int(box_coco[2])
    x2 = int(box_coco[3])
    y2 = int(box_coco[4])
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.rectangle(img_np,(x1, y1),(x2, y2),(0,0,255),thickness=2)
    f = cv2.putText(img_np, text=str(id_), org=(x1 + 5, y1 + 5), fontFace=font, fontScale=1, 
        thickness=2, lineType=cv2.LINE_AA, color=(0, 255, 0))
    return img_np

In [None]:
def bboxes2string(bboxes):
    text = ""
    for bx in bboxes:
        text += str(int(bx[0]))
        for item in bx[1:]:
            text = text + ' ' + str(item)
        text += '\n'
    return text

def write_bboxes(p, bboxes_text):
    with open(p, 'w') as f:
        f.write(bboxes_text)

In [None]:
def read_bboxes(txtp):
    with open(txtp, 'r') as f:
        txt = f.readlines()
    return txt

def get_bboxes_text(txtp):
    import re
    tx_str = read_bboxes(txtp)
    bboxes = []
    for line in tx_str:
        line_list = re.split(r'[\s\n\t]',line)
        bx = []
        bx.append(int(line_list[0]))
        for item in line_list[1:]:
            if len(item.strip())>0:
                bx.append(float(item))
        bboxes.append(bx)
    return bboxes

### Haowei34k

In [None]:
root = Path('/nas/chenyi/dataset_apparel_nas/haowei34k/')

In [None]:
!tree -L 1 $root

In [None]:
haow_df = pd.read_csv(str(root /'haowei34k_lcj_ok_data.csv'))

In [None]:
df07 = haow_df[haow_df['confidence']>0.7]

In [None]:
haow_df.sort_values(by='confidence').head(20)

In [None]:
ROOT = root
id_ = '20141022225156504'
row = haow_df[haow_df['id'].astype(str)==id_]

In [None]:
row

In [None]:
txtp = row['label_path'].values[0]
txtp = txtp.replace('/labels/', '/labels_2/')
imgp = row['img_path'].values[0]

In [None]:
get_bboxes_text(txtp)

In [None]:
img_np = cv2.imread(str(imgp))
img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB)

In [None]:
bboxes = get_bboxes_text(txtp)
h,w,c = img_np.shape
for bx in bboxes:
    bx[1:] = bbox_yolo2voc(bx[1:], w, h)
    draw_bbox(img_np, bx)

In [None]:
Image.fromarray(img_np)

### Data Transfer

In [None]:
sys.path.append('/home/chenyi/workspace/myPyModule/')

In [None]:
from data.transferData import copy_data_multithread, run_task_multithread

In [None]:
# !ls -l /nas/chenyi/dataset_apparel_nas/haowei_conf07/labels

In [None]:
!tree -L 1 /nas/chenyi/dataset_apparel_nas/haowei_conf07/labels

In [None]:
# !mkdir /nas/chenyi/dataset_apparel_nas/haowei_conf07_multi/labels/

In [None]:
source_l = ['/nas/lichangjian/labels_2/20141215090521289.txt']
target_l = ['/nas/chenyi/dataset_apparel_nas/haowei_conf07/labels/20141215090521289.txt']
copy_data_multithread(source_l, target_l, thread_num=2)

In [55]:
import shutil
def copy_data(plist, mode='copy'):
    ps = plist[0]
    pt = plist[1]
    shutil.copyfile(ps, pt)
    
def write_txt(input_list):
    label_path, text = input_list[0], input_list[1]
    with open(label_path, 'a+') as f:
        f.write(text)

In [None]:
run_task_multithread(input_list, input_list, thread_num=10)

In [None]:
df07.head()

In [None]:
save_dir = '/nas/chenyi/dataset_apparel_nas/haowei_conf07_multi/labels/'
df07['label_tp'] = df07['label_path'].apply(lambda x: x.replace(
            '/nas/lichangjian/labels/',
            save_dir))

In [51]:
%%time
input_list = df07.apply(lambda x: 
                        [x['label_tp'], 
                         '{0:.0f} {1:.6f} {2:.6f} {3:.6f} {4:.6f}\n'.format(*x.loc[['label_2', 'x_c', 'y_c', 'w', 'h']].values)], 
                        axis=1)

In [52]:
len(input_list)

60244

In [53]:
input_list[0]

['/nas/chenyi/dataset_apparel_nas/haowei_conf07_multi/labels/20141215090521289.txt',
 '0 0.454839 0.320250 0.412903 0.362912\n']

In [56]:
%%time
run_task_multithread(input_list, write_txt, thread_num=10)

CPU times: user 19 s, sys: 11 s, total: 30 s
Wall time: 5min 1s


In [None]:
# save_dir = '/nas/chenyi/dataset_apparel_nas/haowei_conf07/labels/'
# for ind,row in tqdm(df07.iterrows()):
#     id_ = row['id']
#     label_path = f'{save_dir}{id_}.txt'
#     row_num = row.loc[['label_2', 'x_c', 'y_c', 'w', 'h']].values
#     write_txt(label_path, row_num)

In [None]:
# %%time
# def write_txt(label_path, row):
#     with open(label_path, 'a+') as f:
#         f.write('{0:.0f} {1:.6f} {2:.6f} {3:.6f} {4:.6f}\n'.format(*row))
        

# save_dir = '/nas/chenyi/dataset_apparel_nas/haowei_conf07/labels/'
# for ind,row in tqdm(df07.iterrows()):
#     id_ = row['id']
#     label_path = f'{save_dir}{id_}.txt'
#     row_num = çç
#     write_txt(label_path, row_num)

### OpenFashion

In [None]:
root = Path('/nas/lichangjian/open_fashion/')

In [None]:
!ls $root

In [None]:
!tree -d L 0 $root

In [None]:
ofashion_df = pd.read_csv(str(root /'df_OpenFashion.csv'))

In [None]:
ofashion_df.head()

In [None]:
fname = 'cat_pedia2ours.pickle'
with open(f'{root}info/{fname}', 'rb') as f:
    id2name = pickle.load(f)

In [None]:
pedia_data_p = '/home/chenyi/workspace/dataset/data_fashionpedia/pedia_train_info.pickle'

In [None]:
with open(pedia_data_p, 'rb') as f:
    pedia_info_pd = pickle.load(f)

In [None]:
id2name

In [None]:
pedia_info_pd.head()

In [None]:
id2name[9] = 8

#### fashion2的label问题

In [None]:
label_p = f'{root}labels/train/'

In [None]:
with open(f'{root}info/fashion2info_train_pd.pickle', 'rb') as f:
    fashion2info_pd = pickle.load(f)

In [None]:
fashion2info_pd.head()

In [None]:
fashion2info_pd['img_path'][0]

In [None]:
ind = 1001
img_np = cv2.imread(fashion2info_pd['img_path'][ind])
h,w,c = img_np.shape
bbox = fashion2info_pd['bbox'][ind][0][1:]

In [None]:
box_coco = bbox_yolo2coco(bbox,w,h)

In [None]:
img_np = bbox_draw(img_np, box_coco)
Image.fromarray(img_np)

In [None]:
fashion2_ours = \
            {0: 4, 1: 4,  2:4, 3:4, 4:4, 5:4,  6:5,  7:5,  8:6,  9:7, 10:7,  11:7,  12:7}

In [None]:
for ind,row in fashion2info_pd.iterrows():
    bboxes = row['bbox']

    for bx in bboxes:
        id_ = int(bx[0])
        id_new = fashion2_ours[id_]
        bx[0] = int(id_new)

In [None]:
def bboxes2string(bboxes):
    text = ""
    for bx in bboxes:
        text += str(int(bx[0]))
        for item in bx[1:]:
            text = text + ' ' + str(item)
        text += '\n'
    return text

In [None]:
def write_bboxes(p, bboxes_text):
    with open(p, 'w') as f:
        f.write(bboxes_text)

In [None]:
for ind,row in fashion2info_pd.iterrows():
    bboxes = row['bbox']
    label_p = row['label_path']
    text = bboxes2string(bboxes)
    write_bboxes(label_p, text)

    # break

In [None]:
with open(label_p, 'r') as f:
    print(f.read())

In [None]:
ind = 1001
row_pd = fashion2info_pd.loc[ind, :]

In [None]:
img_np = cv2.imread(row_pd['img_path'])
h,w,c = img_np.shape
bbox = row_pd['bbox'][0][1:]
box_coco = bbox_yolo2coco(bbox,w,h)
img_np = bbox_draw(img_np, box_coco)
Image.fromarray(img_np)

#### pedia的label问题

In [None]:
with open('/home/chenyi/workspace/dataset/deepedia/info/pedia_train_dict.pickle', 'rb') as f:
    pedia_info = pickle.load(f)

In [None]:
with open('/home/chenyi/workspace/dataset/deepedia/info/pedia_info_pd.pickle', 'rb') as f:
    pedia_info_pd = pickle.load(f)

In [None]:
train_root = '/home/chenyi/workspace/dataset/data_fashionpedia/train/'

In [None]:
pedia_info_pd['img_path'] = train_root + pedia_info_pd['file_name'].values

In [None]:
pedia_info_pd.head()

In [None]:
def bbox_draw(img_np, box_coco):
    id_ = 'test'
    x1 = box_coco[0]
    y1 = box_coco[1]
    x2 = box_coco[2]
    y2 = box_coco[3]
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.rectangle(img_np,(x1, y1),(x2, y2),(0,0,255),thickness=2)
    f = cv2.putText(img_np, text=str(id_), org=(x1 + 5, y1 + 5), fontFace=font, fontScale=1, 
        thickness=2, lineType=cv2.LINE_AA, color=(0, 255, 0))
    return img_np

In [None]:
ind = 20113
row_pd = pedia_info_pd.iloc[ind,:]

In [None]:
img_np = cv2.imread(row_pd['img_path'])
h,w,c = img_np.shape
row = row_pd['bbox'][3]
id_ = row[0]
box_coco = row[1:]
box_item = []
for item in box_coco:
    box_item.append(int(item))
bbox_item = bbox_coco2voc(box_item)
img_np = bbox_draw(img_np, box_item)
Image.fromarray(img_np)

### 写操作

In [None]:
with open(f'/home/chenyi/workspace/dataset/deepedia/info/pedia_info_pd.pickle', 'rb') as f:
    pedia_info_pd = pickle.load(f)

In [None]:
pedia_info_pd.head()

In [None]:
for ind, row in pedia_info_pd.iterrows():
    bboxes = row['bbox']
    w,h = row['width'], row['height']
    bx_new = []
    for bx in bboxes:
        bx[0] = int(bx[0])
        # print(bx)
        bx[1:] = bbox_coco2voc(bx[1:])
        # print(bx)
        bx[1:] = bbox_voc2yolo(bx[1:], w, h)
        # print(bx)
    # break


In [None]:
train_s_root = '/home/chenyi/workspace/dataset/data_fashionpedia/val/'

In [None]:
pedia_train = pedia_info_pd[pedia_info_pd['train']=='val']

In [None]:
train_t_root = '/home/chenyi/workspace/dataset/deepedia/images/val/'
label_t_root = '/home/chenyi/workspace/dataset/deepedia/labels/val/'

In [None]:
pedia_train.head()

In [None]:
pedia_train['img_path'] = train_s_root +  pedia_train['file_name'].values

In [None]:
pedia_train['img_target'] = train_t_root +  pedia_train['file_name'].values

In [None]:
pedia_train['label_target'] = label_t_root +  pedia_train['file_name'].values

In [None]:
pedia_train['label_target'] = pedia_train['label_target'].apply(
    lambda x:x.replace('.jpg', '.txt')
)

In [None]:
pedia_train.head()

In [None]:
pedia_train['label_target'].values[0]

In [None]:
def write_bboxes_text(bboxes, p):
    bboxes_str = ''
    for bx in bboxes:
        bboxes_str += f'{str(int(bx[0]))} \
                        {str(bx[1])}  \
                        {str(bx[2])} \
                        {str(bx[3])} \
                        {str(bx[4])} \n'
            
    with open(p, 'w') as f:
        f.write(bboxes_str)

In [None]:
import shutil

In [None]:
for ind,row in pedia_train.iterrows():
    bboxes = row['bbox']
    img_s_path = row['img_path']
    img_t_path = row['img_target']
    label_path = row['label_target']
    write_bboxes_text(bboxes, label_path)
    # shutil.copyfile(img_s_path, img_t_path)
    # break

In [None]:
# with open('/home/chenyi/workspace/dataset/deepedia/info/pedia2yolo_val.pickle', 'wb') as f:
#     pickle.dump(pedia_train, f)

In [None]:
# with open('/home/chenyi/workspace/dataset/deepedia/info/pedia2yolo_train.pickle', 'rb') as f:
#     pedia_train = pickle.load(f)

In [None]:
l = glob.glob('/home/chenyi/workspace/dataset/deepedia/images/train/*')

In [None]:
len(l)