In [1]:
import os
import shutil
import cv2
import random
import re
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image

%matplotlib inline

# 数据集目录结构
```
FashionAI-Attributes
├── models/
├── data/
│   ├── base/
│   ├── rank/
│   └── web/
└── solution.ipynb
```

image_demo = cv2.imread('./data/base/Images/coat_length_labels/fff3f9da02b33c0d2619a1dde0914737.jpg')
image_demo.shape

In [2]:
df_train = pd.read_csv('./data/base/Annotations/label.csv', header=None)
df_train.columns = ['image_id', 'class', 'label']
df_train.head()

Unnamed: 0,image_id,class,label
0,Images/collar_design_labels/4d8a38b29930a403e5...,collar_design_labels,nnynn
1,Images/collar_design_labels/bd0981f231180d2b00...,collar_design_labels,nynnn
2,Images/collar_design_labels/26937e1724feadfe39...,collar_design_labels,ynnnn
3,Images/collar_design_labels/cf4140ec542887270f...,collar_design_labels,nynnn
4,Images/collar_design_labels/50644b2b9de045f2d1...,collar_design_labels,nynnn


In [3]:
classes = ['collar_design_labels', 'neckline_design_labels', 'skirt_length_labels', 
           'sleeve_length_labels', 'neck_design_labels', 'coat_length_labels', 'lapel_design_labels', 
           'pant_length_labels']

In [4]:
cur_class = classes[0]
#df_load = df_train[(df_train['class'] == cur_class)].copy()
df_load = df_train[(df_train['class'] == cur_class)].head(1000)
df_load.reset_index(inplace=True)
del df_load['index']

#print('{0}: {1}'.format(cur_class, len(df_load)))
df_load.head()

Unnamed: 0,image_id,class,label
0,Images/collar_design_labels/4d8a38b29930a403e5...,collar_design_labels,nnynn
1,Images/collar_design_labels/bd0981f231180d2b00...,collar_design_labels,nynnn
2,Images/collar_design_labels/26937e1724feadfe39...,collar_design_labels,ynnnn
3,Images/collar_design_labels/cf4140ec542887270f...,collar_design_labels,nynnn
4,Images/collar_design_labels/50644b2b9de045f2d1...,collar_design_labels,nynnn


In [5]:
n_class = len(df_load['label'][0])

In [6]:
n_class

5

In [7]:
len(df_load)

1000

In [8]:
base_dir = './data/base/'
train_dir = os.path.join(base_dir, 'train', cur_class)
valid_dir = os.path.join(base_dir, 'valid', cur_class)
data_sets = ['train','valid']

for data_set in data_sets:
    set_dir = os.path.join(base_dir, data_set, cur_class)
    if not os.path.exists(set_dir):
        os.makedirs(set_dir)
    for i in range(n_class):
        label_dir = os.path.join(set_dir, str(i))
        if not os.path.exists(label_dir):
            os.makedirs(label_dir)

In [9]:
data_length = len(df_load)

for i in range(data_length):
    tmp_label = df_load['label'][i]
    image_id = df_load['image_id'][i]
    image_id_array = image_id.split('/')
    jpg_name = image_id_array[len(image_id_array) - 1]
    label = str(tmp_label.find('y'))
    
    src = os.path.join(base_dir, image_id)
    
    if i < data_length * 0.9: 
        dst = os.path.join(base_dir, 'train', cur_class, label, jpg_name)
    else:
        dst = os.path.join(base_dir, 'valid', cur_class, label, jpg_name)
    if not os.path.exists(dst):
        shutil.copyfile(src, dst)

In [10]:
from keras import models
from keras import layers
from keras import optimizers
from keras.applications import ResNet50
from keras import backend as K
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [11]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40
) 
valid__datagen = ImageDataGenerator(rescale=1./255)

In [12]:
#image_width = 512
image_width = 224
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(image_width, image_width),
    batch_size=20,
    shuffle=True
)
valid_generator = valid__datagen.flow_from_directory(
    valid_dir,
    target_size=(image_width, image_width)
)

Found 900 images belonging to 5 classes.
Found 100 images belonging to 5 classes.


In [13]:
conv_base = ResNet50(weights='imagenet', include_top=False, input_shape=(image_width, image_width, 3))

In [None]:
conv_base.summary()

In [14]:
for layer in conv_base.layers:
    if re.search(r'^res5', layer.name) is not None:
        layer.trainable = True
    else:
         layer.trainable = False

In [15]:
model = models.Sequential() 
model.add(conv_base) 
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu')) 
model.add(layers.Dropout(0.5))
model.add(layers.Dense(n_class, activation='softmax'))

In [None]:
model.summary()

In [16]:
model.compile(optimizer=optimizers.Adam(lr=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

histroy = model.fit_generator(
    train_generator,
    steps_per_epoch=45,
    epochs=10,
    validation_data=valid_generator
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [17]:
model.save("/models/train_1000.h5")

In [18]:
import matplotlib.pyplot as plt

history 
acc = history.history['acc'] 
val_acc = history.history['val_acc'] 
loss = history.history['loss'] 
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training acc') 
plt.plot(epochs, val_acc, 'b', label='Validation acc') 
plt.title('Training and validation accuracy') 
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss') 
plt.plot(epochs, val_loss, 'b', label='Validation loss') 
plt.title('Training and validation loss') 
plt.legend()

plt.show()

NameError: name 'history' is not defined

In [None]:
from keras.preprocessing import image


def load_image(img_path, show=False):

    img = image.load_img(img_path, target_size=(224, 224))
    img_tensor = image.img_to_array(img)                    # (height, width, channels)
    img_tensor = np.expand_dims(img_tensor, axis=0)         # (1, height, width, channels), add a dimension because the model expects this shape: (batch_size, height, width, channels)
    img_tensor /= 255.                                      # imshow expects values in the range [0, 1]

    if show:
        plt.imshow(img_tensor[0])                           
        plt.axis('off')
        plt.show()

    return img_tensor

df_new = df_train[(df_train['class'] == cur_class)].head(2000)
image_id = df_new['image_id'][1200]
new_image_label = df_new['label'][1200]

img_path = os.path.join(base_dir, image_id)
new_image = load_image(img_path)
pred = model.predict(new_image) 
# test_data = os.path.join('data/rank/Images', cur_class)

# generator = datagen.flow_from_directory(
#         'data/test',
#         target_size=(150, 150),
#         batch_size=16,
#         class_mode=None,  # only data, no labels
#         shuffle=False)  # keep data in same order as labels

# probabilities = model.predict_generator(generator, )

In [None]:
pred

In [None]:
new_image_label