In [1]:
import os
import shutil
import cv2
import random
import re
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image

%matplotlib inline

# 数据集目录结构
```
FashionAI-Attributes
├── data/
│   ├── base/
│   ├── rank/
│   └── web/
└── solution.ipynb
```

image_demo = cv2.imread('./data/base/Images/coat_length_labels/fff3f9da02b33c0d2619a1dde0914737.jpg')
image_demo.shape

In [2]:
df_train = pd.read_csv('./data/base/Annotations/label.csv', header=None)
df_train.columns = ['image_id', 'class', 'label']
df_train.head()

Unnamed: 0,image_id,class,label
0,Images/collar_design_labels/4d8a38b29930a403e5...,collar_design_labels,nnynn
1,Images/collar_design_labels/bd0981f231180d2b00...,collar_design_labels,nynnn
2,Images/collar_design_labels/26937e1724feadfe39...,collar_design_labels,ynnnn
3,Images/collar_design_labels/cf4140ec542887270f...,collar_design_labels,nynnn
4,Images/collar_design_labels/50644b2b9de045f2d1...,collar_design_labels,nynnn


In [3]:
classes = ['collar_design_labels', 'neckline_design_labels', 'skirt_length_labels', 
           'sleeve_length_labels', 'neck_design_labels', 'coat_length_labels', 'lapel_design_labels', 
           'pant_length_labels']

In [4]:
cur_class = classes[0]
df_load = df_train[(df_train['class'] == cur_class)].copy()
df_load.reset_index(inplace=True)
del df_load['index']

print('{0}: {1}'.format(cur_class, len(df_load)))
df_load.head()

collar_design_labels: 8393


Unnamed: 0,image_id,class,label
0,Images/collar_design_labels/4d8a38b29930a403e5...,collar_design_labels,nnynn
1,Images/collar_design_labels/bd0981f231180d2b00...,collar_design_labels,nynnn
2,Images/collar_design_labels/26937e1724feadfe39...,collar_design_labels,ynnnn
3,Images/collar_design_labels/cf4140ec542887270f...,collar_design_labels,nynnn
4,Images/collar_design_labels/50644b2b9de045f2d1...,collar_design_labels,nynnn


In [5]:
n_class = len(df_load['label'][0])
label_length = len(df_load['label'][0])

In [6]:
base_dir = './data/base/'
train_dir = os.path.join(base_dir, 'train', cur_class)
valid_dir = os.path.join(base_dir, 'valid', cur_class)
data_sets = ['train','valid']

for data_set in data_sets:
    set_dir = os.path.join(base_dir, data_set, cur_class)
    if not os.path.exists(set_dir):
        os.makedirs(set_dir)
    for i in range(label_length):
        label_dir = os.path.join(set_dir, str(i))
        if not os.path.exists(label_dir):
            os.makedirs(label_dir)

In [7]:
data_length = len(df_load)

for i in range(data_length):
    tmp_label = df_load['label'][i]
    image_id = df_load['image_id'][i]
    image_id_array = image_id.split('/')
    jpg_name = image_id_array[len(image_id_array) - 1]
    label = str(tmp_label.find('y'))
    
    src = os.path.join(base_dir, image_id)
    
    if i < data_length * 0.9: 
        dst = os.path.join(base_dir, 'train', cur_class, label, jpg_name)
    else:
        dst = os.path.join(base_dir, 'valid', cur_class, label, jpg_name)
    if not os.path.exists(dst):
        shutil.copyfile(src, dst)

In [8]:
from keras import models
from keras import layers
from keras import optimizers
from keras.applications import ResNet50
from keras import backend as K
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [9]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    horizontal_flip=True,
    vertical_flip=True
) 
valid__datagen = ImageDataGenerator(rescale=1./255)

In [10]:
image_width = 512
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(image_width, image_width),
    batch_size=32,
    class_mode="categorical",
    shuffle=True
)
valid_generator = valid__datagen.flow_from_directory(
    valid_dir,
    target_size=(image_width, image_width),
    batch_size=32,
    class_mode="categorical",
    shuffle=True
)

Found 7554 images belonging to 5 classes.
Found 839 images belonging to 5 classes.


In [11]:
conv_base = ResNet50(weights='imagenet', include_top=False, input_shape=(image_width, image_width, 3))

In [12]:
conv_base.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 512, 512, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 518, 518, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 256, 256, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 256, 256, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [13]:
for layer in conv_base.layers:
    if re.search(r'^res5', layer.name) is not None:
        layer.trainable = True
    else:
         layer.trainable = False

In [14]:
model = models.Sequential() 
model.add(conv_base) 
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu')) 
model.add(layers.Dropout(0.5))
model.add(layers.Dense(n_class, activation='sigmoid'))

In [15]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2, 2, 2048)        23587712  
_________________________________________________________________
flatten_1 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               2097408   
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 1285      
Total params: 25,686,405
Trainable params: 17,052,165
Non-trainable params: 8,634,240
_________________________________________________________________


In [None]:

model.compile(optimizer=optimizers.Adam(lr=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

histroy = model.fit_generator(
    train_generator,
    steps_per_epoch=236,
    epochs=80,
    validation_data=valid_generator,
    validation_steps=50
)

Epoch 1/80
