In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from PIL import Image
from PIL import ImageEnhance,ImageChops,ImageOps
from sklearn.preprocessing import OneHotEncoder

from sklearn.model_selection import train_test_split
import categories
import helper_function
import math
import copy
# import pillow

In [None]:
imsize = 64
annotation = pd.read_csv("train/Annotations/train.csv")

In [None]:
"""
预处理小于 512*512图片
pad 补丁让 图片 为512* 512
并且让更新坐标
宽度为 shapep[1] 高度为 shape[0]
"""


#写入每张图片size
def write_img_size(df , file_dir ="train/"):
    #width : shape[0] height shape[1]
    size = pd.DataFrame(columns=["width" , "height"])
    max_width=10000
    max_height=10000
    min_width=0
    max_width=0
    for idx,row in df.iterrows():
        filepath_test = file_dir+row['image_id']
        img = Image.open(filepath_test)
        img = np.array(img)
        size.loc[idx]=[img.shape[0],img.shape[1]]
#         wid.set_value(id, img.shape[0])
#         height[idx] = img.shape[1]
    return size

def pad_img(np_img , size = 512):
    wid_diff = size-np_img.shape[1]
    height_diff = size - np_img.shape[0]
    left = int(wid_diff/2)
    right = size-left-np_img.shape[1]
    up = int(height_diff/2)
    down = size-up- np_img.shape[0]
    
    img_pad = np.pad(np_img , ((up,down),(left,right),(0,0)) , 'constant',constant_values=0)
    
    return img_pad,left,up


def pad_images(df,size = 512 , pre_path = 'train/' ,is_train=True):
    
    l_m_columns = df.columns.drop(['image_id' , 'image_category'])
    if is_train ==False:
        test_offset_df = pd.DataFrame(columns=['image_id' , 'image_category','width','height'])
    for idx,row in df.iterrows():
        filepath = pre_path+row['image_id']
        img = Image.open(filepath)
        if is_train ==False:
            test_offset_df=test_offset_df.append(pd.Series([row['image_id'],row['image_category'] ,img.size[0] ,img.size[1] ],index =test_offset_df.columns ) , 
                                                 ignore_index=True)
        np_img = np.array(img)
        if np_img.shape[0] < size or np_img.shape[1] < size:
#             print("need padding id: ",idx)
            (np_img,left,up) = pad_img(np_img , size)
            img = Image.fromarray(np_img, 'RGB')
#             img.save(str(idx)+".jpg")
            img.save(filepath)
            for col in l_m_columns:
                coord_list = row[col].split('_')
                coord_list = list(map(int,coord_list))
                
                if coord_list[0] != -1:
#                     print(coord_list)
                    #更新padding后的坐标
                    coord_list[0] +=left
                    coord_list[1] +=up
                    coord_list = list(map(str,coord_list))
                    coord_list = '_'.join(coord_list)
                    df.loc[idx,col] = coord_list
#                     print(coord_list)
    if is_train ==False:
        test_offset_df.to_csv(pre_path+"test_size.csv" , index=False)
    return df



In [None]:
"""
pad train dataset
"""
pre_path = "train_pad/"
intput_file_name = "Annotations/train.csv"
output_file_name = "Annotations/train_pad.csv"

data_small_eg = pd.read_csv(pre_path + intput_file_name)
data_small_pad = pad_images(data_small_eg,pre_path=pre_path)
data_small_pad.to_csv(pre_path + output_file_name,index=False)

In [None]:
"""
pad warm up dataset
"""
pre_path = "train_warm_up_pad/"
intput_file_name = "Annotations/annotations.csv"
output_file_name = "Annotations/train_pad.csv"

data_small_eg = pd.read_csv(pre_path + intput_file_name)
data_small_pad = pad_images(data_small_eg,pre_path=pre_path)
data_small_pad.to_csv(pre_path + output_file_name,index=False)

In [None]:
"""
pad test dataset
"""
pre_path = "test_pad/"
intput_file_name = "test.csv"
output_file_name = "test_pad.csv"

data_small_eg = pd.read_csv(pre_path + intput_file_name)
data_small_pad = pad_images(data_small_eg,pre_path=pre_path , is_train=False)
data_small_pad.to_csv(pre_path + output_file_name,index=False)

In [None]:
"""
image augmentation
"""

import os, errno
from random import randint,uniform,choice

blouse_dict = {"neckline_left":"neckline_right","shoulder_left":'shoulder_right','armpit_left':'armpit_right',
               "cuff_left_in":"cuff_right_in","cuff_left_out":"cuff_right_out","top_hem_left":"top_hem_right"}

skirt_dict = {"waistband_left":"waistband_right","hemline_left":'hemline_right'}

outwear_dict = {"neckline_left":"neckline_right","shoulder_left":'shoulder_right','armpit_left':'armpit_right',
                'waistline_left' :'waistline_right',
               "cuff_left_in":"cuff_right_in","cuff_left_out":"cuff_right_out","top_hem_left":"top_hem_right"}

trousers_dict = {"waistband_left":"waistband_right","bottom_left_in":'bottom_right_in',
                 "bottom_left_out":"bottom_right_out"}

dress_dict = {"neckline_left":"neckline_right","shoulder_left":'shoulder_right','armpit_left':'armpit_right',
                'waistline_left' :'waistline_right',
               "cuff_left_in":"cuff_right_in","cuff_left_out":"cuff_right_out","hemline_left":"hemline_right"}
lm_all_dict = {"blouse": blouse_dict , 'skirt':skirt_dict , "outwear": outwear_dict , "trousers":trousers_dict,
              "dress":dress_dict}


def aug_images(df,im_size = 512, pre_path = './train_pad/',mode="mirror"):
    for name in df.image_category.unique():
        try:
            os.makedirs(pre_path+mode+"/"+"Images/"+name)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise    
    l_m_columns = df.columns.drop(['image_id' , 'image_category'])
    if mode == "mirror":        
        new_df = pd.DataFrame(columns=df.columns)
        for idx,row in df.iterrows():
            lm_dict = lm_all_dict[row['image_category']]
            copy_row = copy.copy(row)
            filepath = pre_path+row['image_id']
            img = Image.open(filepath)
            img =img.transpose(Image.FLIP_LEFT_RIGHT)
            img.save(pre_path+mode+"/"+row['image_id'])
            for col in l_m_columns:
                coord_list = row[col].split('_')
                coord_list = list(map(int,coord_list))

                if coord_list[0] != -1:
    #                     print(coord_list)
                    #更新padding后的坐标
                    coord_list[0] =im_size - coord_list[0]
    #                 coord_list[1] =512 - coord_list[1]
                    coord_list = list(map(str,coord_list))
                    coord_list = '_'.join(coord_list)
                    copy_row[col] = coord_list

            for key,value in lm_dict.items():
                
                temp = copy_row[key]

                copy_row[key] = copy_row[value]
                copy_row[value] = temp

#                     print(coord_list)
            copy_row['image_id'] = mode+"/"+row['image_id']
            new_df=new_df.append(copy_row,ignore_index=True)
    if mode == "crop":
        for idx,row in df.iterrows():
            #every images
            #find the max and min width and height of LANDMARKS
            max_wid=0
            min_wid=im_size+1
            max_h=0
            min_h=im_size+1
            for col in l_m_columns:
                coord_list = row[col].split('_')
                coord_list = list(map(int,coord_list))
                if coord_list[0] != -1:
    #                     print(coord_list)
                    #更新padding后的坐标
                    max_wid = max(max_wid ,coord_list[0] )
                    min_wid = min(min_wid ,coord_list[0] )
                    max_h = max(max_h ,coord_list[1] )
                    min_h = min(min_h ,coord_list[1] ) 
            edge=10        
            left = randint(0,max(min_wid-edge,0))
            right = randint(min(im_size,max_wid+edge),im_size)
            up= randint(0,max(min_h-edge,0))
            down= randint(min(im_size,max_h+edge) , im_size)
            filepath = pre_path+row['image_id']
            img = Image.open(filepath)
            img = img.crop(
                (
                    left,
                    up,
                    right,
                    down
                )
            )
            horizontal_padding = int((im_size - img.size[0]) / 2)
            vertical_padding = int((im_size - img.size[1]) / 2)
            
            img = img.crop(
                (
                    -left,
                    -up,
                    img.size[0] +  512-right,
                    img.size[1] + 512 - down 
                )
            )
            
                    
            img.save(pre_path+mode+"/"+row['image_id'])
            row['image_id'] = mode+'/'+row['image_id']
    if mode == "shear":
        new_df = pd.DataFrame(columns=df.columns)
        for idx,row in df.iterrows():
            copy_row = copy.copy(row)
            filepath = pre_path+row['image_id']
            img = Image.open(filepath)
            width, height = img.size
            #random m
            m = round(uniform(0.4,0.6),1)
            shift = abs(m) * width
            shear_mode =1# choice([0,1])
            if shear_mode ==1:
                
                new_width = width + int(round(shift))
                img = img.transform((new_width, height), Image.AFFINE,
                        (1, m, -shift if m > 0 else 0, 0, 1, 0), Image.BICUBIC)
                horizontal_padding = int((im_size - img.size[0]) / 2)
                for col in l_m_columns:
                    coord_list = row[col].split('_')
                    coord_list = list(map(int,coord_list))

                    if coord_list[0] != -1:
        #                     print(coord_list)
                        #更新padding后的坐标
                        coord_list[0] =coord_list[0] - (coord_list[1])*m
                        coord_list[0] = coord_list[0] + horizontal_padding
                        coord_list = list(map(str,coord_list))
                        coord_list = '_'.join(coord_list)
                        copy_row[col] = coord_list
                    
                
                # vertical_padding = int((im_size - img.size[1]) / 2)
                img = img.crop((-horizontal_padding,0,
                                img.size[0] -(shift+horizontal_padding),
                                im_size ))
            else:
                new_height = height + int(round(shift))
                img = img.transform((width, new_height), Image.AFFINE,
                      (  1, 0,0, m, 1, -shift if m > 0 else 0), Image.BICUBIC)
                for col in l_m_columns:
                    coord_list = row[col].split('_')
                    coord_list = list(map(int,coord_list))

                    if coord_list[0] != -1:
        #                     print(coord_list)
                        #更新padding后的坐标
                        coord_list[1] =coord_list[1] - (coord_list[0])*m
                        coord_list[1] = coord_list[1] + int((im_size - img.size[1]) / 2)
                        coord_list = list(map(str,coord_list))
                        coord_list = '_'.join(coord_list)
                        copy_row[col] = coord_list

                vertical_padding = int((im_size - img.size[1]) / 2)
                img = img.crop((0,-vertical_padding,
                                im_size,
                                img.size[1] -(shift+vertical_padding) ))
            img.save(pre_path+mode+"/"+row['image_id'])
            copy_row['image_id'] = mode+'/'+row['image_id']
            new_df=new_df.append(copy_row,ignore_index=True)

    if mode == "jittering":
        new_df = pd.DataFrame(columns=df.columns)
        for i in range(1):
            for idx,row in df.iterrows():    
                copy_row = copy.copy(row)
                filepath = pre_path+row['image_id']
                img = Image.open(filepath)

#                 #----颜色操作-----
#                 color_upper=1
#                 color_bottom = 0
#                 red_value =50# randint(color_bottom,color_upper)
#                 green_value = randint(color_bottom,color_upper)
#                 blue_value = randint(color_bottom,color_upper)
#                 print(red_value,green_value,blue_value)
                
#                 np_img = np.array(img)
#                 print(np_img[1:10,1:10,0])
#                 np_img[:,:,0]+=red_value
#                 np_img[:,:,1]+=red_value
#                 np_img[:,:,2]+=red_value
#                 print(np_img[1:10,1:10,0])
#                 img = Image.fromarray(np_img,mode="RGB")
                
                # ---------旋转--------
                angle_bound = 25
                angle = randint(-angle_bound,angle_bound)

                radian = math.pi/180*angle
                
                for col in l_m_columns:
                    coord_list = row[col].split('_')
                    coord_list = list(map(int,coord_list))

                    if coord_list[0] != -1:
                        x = coord_list[0] - im_size/2
                        y = (512- coord_list[1])
                        y=y- im_size/2                    
                        coord_list[0] = x*math.cos(radian) - ((y)*math.sin(radian))
                        coord_list[0] =int(coord_list[0] + im_size/2)
                        coord_list[1] =(x*math.sin(radian) + ((y)*math.cos(radian)))
                        coord_list[1] =512-int(coord_list[1]+im_size/2 )
                        if coord_list[0]>im_size or coord_list[0]<0 or coord_list[1]>im_size or coord_list[1]<0:
                            angle=0
                            break
                if angle !=0:
                    for col in l_m_columns:
                        coord_list = row[col].split('_')
                        coord_list = list(map(int,coord_list))

                        if coord_list[0] != -1:
                            x = coord_list[0] - im_size/2
                            y = (512- coord_list[1])
                            y=y- im_size/2

                            coord_list[0] = x*math.cos(radian) - ((y)*math.sin(radian))
                            coord_list[0] =int(coord_list[0] + im_size/2)
                            coord_list[1] =(x*math.sin(radian) + ((y)*math.cos(radian)))
                            coord_list[1] =512-int(coord_list[1]+im_size/2 )

                            coord_list = list(map(str,coord_list))
                            coord_list = '_'.join(coord_list)
                            copy_row[col] = coord_list
                img = img.rotate(angle)
                #-----切割-----
                max_wid=0
                min_wid=im_size+1
                max_h=0
                min_h=im_size+1
                for col in l_m_columns:
                    coord_list = row[col].split('_')
                    coord_list = list(map(int,coord_list))
                    if coord_list[0] != -1:
            #                     print(coord_list)
                        #更新padding后的坐标
                        max_wid = max(max_wid ,coord_list[0] )
                        min_wid = min(min_wid ,coord_list[0] )
                        max_h = max(max_h ,coord_list[1] )
                        min_h = min(min_h ,coord_list[1] ) 
                edge=10        
                left = randint(0,max(min_wid-edge,0))
                right = randint(min(im_size,max_wid+edge),im_size)
                up= randint(0,max(min_h-edge,0))
                down= randint(min(im_size,max_h+edge) , im_size)

                img = img.crop((left, up,right,down))

                img = img.crop((-left,-up,
                                img.size[0] +  512-right,
                                img.size[1] + 512 - down ))


                    

                #----图片属性操作----#
                value =choice([uniform(0.3,0.7) , uniform(1.5,2.0)] )
                value = round(value,1)
                
                if i==1:
                    enhancer = ImageEnhance.Contrast(img)
                    img = enhancer.enhance(value)
                if i == 0:    
                    enhancer = ImageEnhance.Brightness(img)
                    img = enhancer.enhance(value)
                    img = ImageOps.equalize(img)
                if i ==2:
                    enhancer = ImageEnhance.Color(img)
                    img = enhancer.enhance(value)
                    img = ImageOps.invert(img)
                    
#                 print(pre_path+mode+"/"+row['image_id'][:-4]+str(i))
                img.save(pre_path+mode+"/"+row['image_id'][:-4]+str(i)+".jpg")
                copy_row['image_id'] = mode+"/"+row['image_id'][:-4]+str(i)+".jpg"
                new_df=new_df.append(copy_row,ignore_index=True)
    new_df.to_csv(pre_path+"Annotations/train_pad_"+str(mode)+".csv" , index=False)

    
    

    
#     return df
pre_path = "train_pad/"
intput_file_name = "Annotations/train_pad.csv"

data= pd.read_csv(pre_path + intput_file_name)
# aug_images(data[:10],mode="crop")
# aug_images(data[:10],mode="mirror")
# data= pd.read_csv(pre_path + intput_file_name)
# # aug_images(data[:10],mode="crop")
# aug_images(data[:10],mode="crop")

aug_images(data,mode="mirror")
# data.to_csv(pre_path + output_file_name,index=False)

# img = Image.open("a.jpg")
# img =img.transpose(Image.FLIP_LEFT_RIGHT)
# plt.imshow(img)

In [None]:
a = pd.read_csv("train_pad/Annotations/train_pad_mirror.csv")
helper_function.show_im_lms(a,0,1,"./train_pad/")

In [None]:
"""
data_augmentation
将增强的图片的CSV 分类写入不同的文件中
"""

pre_path = "train_pad/"
intput_file_name = "Annotations/train_pad.csv"
output_file_name = "Annotations/train_pad_aug.csv"


#将所有种类写入train_pad_aug.csv 文件中
aug_methods = ["mirror"]
origin_df = pd.read_csv(pre_path + intput_file_name)
for method in aug_methods:
    new_df = pd.read_csv(pre_path + intput_file_name[:-4]+"_"+method+".csv")
    origin_df = pd.concat([origin_df,new_df],ignore_index=True)
origin_df.to_csv(pre_path+output_file_name,index=False)

#通过不同的类型写入 读入aumentatation的图片
def write_with_category_aug(df  , pre_path="train/"):
    write_path = pre_path+"Annotations/train_"
    all_categories = df.image_category.unique()
    for category in all_categories:
        df_new = df.loc[df.image_category==category,:]
        columns = df_new.columns
        if "height" in columns or "width" in columns:
            l_m_columns = columns.drop(['image_id' , 'image_category','height','width'])
        else:
            l_m_columns = columns.drop(['image_id' , 'image_category'])
        cols = categories.get_columns(category)
        for col in cols:
            coord_list = df[col].str.split('_')
        #if int(coord_list[0][1]) != -1:
            df_new[[col+"_x" , col+"_y", col+"_vis"]] = pd.DataFrame(coord_list.tolist(), index= df.index)
        df_new = df_new.drop(l_m_columns,axis=1)
        df_new.to_csv(write_path+category+"_coord_augs.csv",index =False)

origin_df = pd.read_csv(pre_path+output_file_name)
write_with_category_aug(origin_df,pre_path)
    

In [None]:
"""
将数据分类别
"""

#通过不同的类型写入
def write_with_category(df  , pre_path="train/" ,is_train = True):
    if is_train:
        write_path = pre_path+"Annotations/train_"
    else:
        write_path = pre_path+"test_"
    categories = df.image_category.unique()
    for category in categories:
        df_new = df.loc[df.image_category==category,:]
        df_new.to_csv(write_path+category+".csv",index =False)
        
pre_path = "train_pad/"
intput_file_name =  "Annotations/train_pad.csv"
data_pad = pd.read_csv(pre_path + intput_file_name)
write_with_category(data_pad,pre_path=pre_path)

pre_path = "train_warm_up_pad/"
intput_file_name =  "Annotations/train_pad.csv"
data_pad = pd.read_csv(pre_path + intput_file_name)
write_with_category(data_pad,pre_path=pre_path)



In [None]:
pre_path = "./test_pad/"
intput_file_name =  "test_pad.csv"
data_pad = pd.read_csv(pre_path + intput_file_name)
write_with_category(data_pad,pre_path=pre_path,is_train=False)

In [None]:
"""
将数据分隔为x y visbile
"""
cates = ["blouse" ,"outwear","trousers","skirt","dress" ]
pre_path = "train_pad/"
#输入原来数据结构
#输出分解后的坐标
def split_coord(df ,cate, output_path):
    columns = df.columns
    if "height" in columns or "width" in columns:
        l_m_columns = columns.drop(['image_id' , 'image_category','height','width'])
    else:
        l_m_columns = columns.drop(['image_id' , 'image_category'])
    cols = categories.get_columns(idx)
    for col in cols:
        coord_list = df[col].str.split('_')
        #if int(coord_list[0][1]) != -1:
        df[[col+"_x" , col+"_y", col+"_vis"]] = pd.DataFrame(coord_list.tolist(), index= df.index)
    df = df.drop(l_m_columns,axis=1)
    df.to_csv( output_path, index = False)
    return df
for idx,cate in enumerate(cates):
    intput_file =  "Annotations/train_"+cate+".csv"
    output_coord_file = pre_path +"Annotations/train_"+cate+"_coord.csv"
    
    data_blouse = pd.read_csv(pre_path + intput_file)
    data_blouse_coord = split_coord(data_blouse,idx,output_coord_file)



In [None]:
"""
将所有
坐标都写入 CSV ，
将数据分隔为x y visbile
"""
cates = ["blouse" ,"outwear","trousers","skirt","dress" ]
pre_path = "train_pad/"
#输入原来数据结构
#输出分解后的坐标
def split_coord_all(df ,cate, output_path):
    columns = df.columns
    if "height" in columns or "width" in columns:
        l_m_columns = columns.drop(['image_id' , 'image_category','height','width'])
    else:
        l_m_columns = columns.drop(['image_id' , 'image_category'])
#     cols = categories.get_columns(idx)
    for col in l_m_columns:
        coord_list = df[col].str.split('_')
        #if int(coord_list[0][1]) != -1:
        df[[col+"_x" , col+"_y", col+"_vis"]] = pd.DataFrame(coord_list.tolist(), index= df.index)
    df = df.drop(l_m_columns,axis=1)
    df.to_csv( output_path, index = False)
    return df
for idx,cate in enumerate(cates):
    intput_file =  "Annotations/train_"+cate+".csv"
    output_coord_file = pre_path +"Annotations/train_"+cate+"_coord_all.csv"
    
    data_blouse = pd.read_csv(pre_path + intput_file)
    data_blouse_coord = split_coord_all(data_blouse,idx,output_coord_file)

In [None]:
pre_path = "./train_pad/"
cates = ["blouse" ,"outwear","trousers","skirt","dress" ]
def clean_non_landmark(category_name, pre_path):
    file_name = pre_path + "Annotations/train_"+ category_name +"_coord.csv"
    output_name = pre_path + "Annotations/train_"+ category_name +"_coord_cleaned.csv"
    df = pd.read_csv(file_name)
    print("read: "+ file_name)
    print(df.shape)
    i=[]
    for idx,row in df.iterrows():
        if -1 in row.values:
            i.append(idx)
    df.drop(df.index[i],inplace=True)
    df.reset_index()
    print(df.shape)
    df.to_csv(output_name,index=False)
for cate in cates:
    clean_non_landmark(cate , pre_path)

# 以下为测试

In [None]:

data_pad = pd.read_csv(pre_path + output_file_name)
show_im_lms(data_pad,28172,2,pre_dir=pre_path)


In [None]:

#helper function:

def show_im_lms(df,index,scale=1 , pre_dir = 'train/'):
    #show landmarks
    columns = df.columns
    l_m_columns = columns.drop(['image_id' , 'image_category'])
    for col in l_m_columns:
        coord = df.loc[index,col]
        coord=coord.split('_')
        #change the string into integer
        coord = list(map(float, coord))
        if coord[0]!=-1:
            x=coord[0]/scale
            y=coord[1]/scale
            plt.plot(x,y,'*')
            
    filepath = pre_dir+df.loc[index,'image_id']
    img = Image.open(filepath)
    width = int(np.array(img).shape[1]/scale)
    height = int(np.array(img).shape[0]/scale)
    img = img.resize((width,height))
    plt.imshow(img)
    
def make_small_df(df , size =99):
    category_size = {}
    for idx,cate in enumerate(df.image_category.unique()):
        category_size[cate] = df.loc[df['image_category'] == cate,:].shape[0]
    
    df_result = df.loc[:size,:]
    beg=0
    for name,value in category_size.items():
        #print(name,value)
        beg+=value
        if beg>df.shape[0]:
            break
        df_result=pd.concat([df_result,df.loc[beg:beg+size,:]])
    return df_result.reset_index()
"""

x_onehot (m, wid*height*3)
"""    
def set_y(df):
    #create category encoding
    category_encode = {}
    category_size = {}
    category_array={}
    for idx,cate in enumerate(df.image_category.unique()):
        category_encode[cate] =idx
        category_size[cate] = df.loc[df['image_category'] == cate,:].shape[0]
        #map the category with encoding.
        category_array[cate] = df.loc[df['image_category'] == cate,'image_category'].map(category_encode).as_matrix()
    y_cate = df.image_category
    y_cate=y_cate.map(category_encode)
    return y_cate
"""

x_onehot (m, wid*height*3)
"""  



##########################################






def set_x_one_hot(df , im_size = 128):
    filepath_test = 'train/'+df.loc[0,'image_id']
    img = Image.open(filepath_test)

    new_img = img.resize((im_size,im_size))
    x_all =np.expand_dims( np.array(new_img).reshape((-1)) , axis=0)
    size= df.shape[0]  
    
    for idx,row in df.iterrows():
        filepath_test = 'train/'+row['image_id']
        img = Image.open(filepath_test)
        np_img = np.array(img)
        if np_img.shape[0]!= 512 or np_img.shape[1]!= 512:
            pad_img(np_img)
        
        new_img = img.resize((im_size,im_size))
        np_img = np.array(new_img)
        np_img = np_img.reshape((-1))
        #print(np_img.shape)
#         np.concatenate(x_all,np.array(new_img))
        x_all = np.append(x_all,np.expand_dims(np_img,axis=0),axis=0)
#     print(x_all.shape)
#     x_all=x_all.reshape((size,-1))
#     print(x_all.shape)
#     np.savetxt('images.txt' , x_all)
    return x_all[1:]
#     x_all=x_all.reshape((size,im_size,im_size,3))
#     print(x_all.shape)
#     np.savetxt('images.txt' , x_all)
    #plt.imshow(new_img)
    #np.array(new_img).shape





def update_coord(df,idx, size = 512):
    filepath_test = 'train/'+df.loc[idx,'image_id']
    img = Image.open(filepath_test)
    img = np.array(img)
    
    wid_diff = size-img.shape[0]
    height_diff = size - img.shape[1]
    left = int(wid_diff/2)
    up = int(height_diff/2)
    
    df = df.loc[idx,df.columns.drop(['image_id' , 'image_category','width','height'])]
    
    columns = df.columns
    col_size = columns.shape[0]
    df[columns[np.arange(0,col_size,3)]] = df[columns[np.arange(0,col_size,3)]]+left
    df[columns[np.arange(1,col_size,3)]] = df[columns[np.arange(1,col_size,3)]]+up
    
    print(df[columns[np.arange(0,col_size,3)]].shape)
    return df



    

def get_x_y(df,df_size,scale=1):
    df=df[:df_size]
    
    x=set_x_one_hot(df,imsize)
    y=set_y_coord(data_train_blouse_split)

In [None]:
#将图片size加入数据结构
train_data = pd.read_csv("train/Annotations/train.csv")
train_size = pd.read_csv("train_size.csv")
train_data[["width" ,"height"]] = train_size[["width" ,"height"]]
train_data.to_csv("train/Annotations/train_with_size.csv",index =False)


test_data= pd.read_csv("test/test.csv")
test_size = pd.read_csv("test_size.csv")
test_data[["width" ,"height"]] = test_size[["width" ,"height"]]
test_data.to_csv("test/test_with_size.csv",index =False)



In [None]:
write_with_category(train_data)

In [None]:
#write blouse的坐标系
data_train_blouse = pd.read_csv("train/Annotations/train_blouse.csv")
data_train_blouse = clean_columns(data_train_blouse,1)
data_train_blouse_split = split_coord(data_train_blouse)
data_train_blouse_split.to_csv("train/Annotations/train_blouse_coord.csv" , index = False)

In [None]:
set_x_one_hot(data_train_blouse_split[:100]).shape
# set_y_coord(data_train_blouse_split)

In [None]:
update_coord(data_train_blouse_split,1)

In [None]:
# x_all  = np.array([])
# set_x_one_hot(annotation).
small_df = make_small_df(annotation)
x_one_hot = set_x_one_hot(small_df,imsize)


In [None]:
y_cate = set_y(small_df)
y_cate.shape
y_cate=pd.get_dummies(y_cate).as_matrix()


In [None]:
import sklearn.model_selection as sk

X_train, X_test, y_train, y_test = sk.train_test_split(x_one_hot,y_cate,test_size=0.33, random_state = 42)
X_train.shape
y_train.shape

In [None]:
enc = OneHotEncoder()
enc.fit([[1,2,3]])

In [None]:
filepath_test = 'train/'+annotation.loc[0,'image_id']

print(annotation.shape)
annotation.head()
# type(annotation)

l_m_columns = columns.drop(['image_id' , 'image_category'])
test = annotation.loc[0,l_m_columns]
print(test.shape)
test.str.split('_')
type(test.str.split('_').as_matrix())
test.str.split('_').as_matrix()

In [None]:
im = imageio.imread(filepath_test)

In [None]:
print(im.shape[0])
plt.imshow(im)
im.resize

In [None]:
min_wid = 600
min_len = 600
for idx ,row in annotation.iterrows():
    filepath_test = 'train/'+row['image_id']
    im = imageio.imread(filepath_test)
    min_wid = min(min_wid,im.shape[0])
    min_len = min(min_len,im.shape[1])
print(min_wid , min_len)

In [None]:
#resize image

img = Image.open(filepath_test)
# new_img = img.resize((1700,200))

img.save("a.jpg")
# plt.imshow(new_img)
img = np.array(img)
img.shape

img_pad = np.pad(img , ((50,50),(50,50),(0,0)) , 'constant')
plt.imshow(img_pad)
img_pad.shape


In [None]:
img = Image.open("a.png")
np.array(img).shape