In [None]:
from sklearn.datasets import load_files       
from keras.utils import np_utils
import numpy as np
from glob import glob

# 定义函数来加载train，test和validation数据集
def load_dataset(path):
    data = load_files(path)
    filenames = np.array(data['filenames'])
    classes = np_utils.to_categorical(np.array(data['target']), 133)
    return filenames, classes

# 加载train，test和validation数据集
train_files, train_targets = load_dataset('data/train')
valid_files, valid_targets = load_dataset('data/valid')
test_files, test_targets = load_dataset('data/test')

# 打印数据统计描述
print('There are %s total dog images.\n' % len(np.hstack([train_files, valid_files, test_files])))
print('There are %d training dog images.' % len(train_files))
print('There are %d validation dog images.' % len(valid_files))
print('There are %d test dog images.'% len(test_files))

Using TensorFlow backend.


In [None]:

# 加载类别列表
kind_names = [item[11:-1] for item in sorted(glob("data/train/*/"))]
print(kind_names)

In [None]:
from keras.preprocessing import image                  
from tqdm import tqdm

def path_to_tensor(img_path):
    # 用PIL加载RGB图像为PIL.Image.Image类型
    img = image.load_img(img_path, target_size=(224, 224))
    # 将PIL.Image.Image类型转化为格式为(224, 224, 3)的3维张量
    x = image.img_to_array(img)
    # 将3维张量转化为格式为(1, 224, 224, 3)的4维张量并返回
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)


In [None]:
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

# Keras中的数据预处理过程
train_tensors = paths_to_tensor(train_files).astype('float32')/255
valid_tensors = paths_to_tensor(valid_files).astype('float32')/255
test_tensors = paths_to_tensor(test_files).astype('float32')/255

In [None]:
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential

model = Sequential()

### TODO: 定义你的网络架构
model.add(Conv2D(16, kernel_size=2, input_shape=(224, 224, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=2, strides=2))
model.add(Conv2D(32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2, strides=2))
model.add(Conv2D(64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2, strides=2))
model.add(GlobalAveragePooling2D())
model.add(Dense(3, activation='softmax'))

model.summary()

In [None]:
## 编译模型
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
    zoom_range=0.5,
    width_shift_range=0.3,
    height_shift_range=0.3,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=180
)

In [36]:
from keras.callbacks import ModelCheckpoint  

### TODO: 设置训练模型的epochs的数量

epochs = 20
batch_size = 20

### 不要修改下方代码

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.from_scratch.hdf5', 
                               verbose=1, save_best_only=True)

model.fit_generator(datagen.flow(train_tensors, train_targets, batch_size=batch_size),
                    steps_per_epoch=train_tensors.shape[0]/batch_size,
          validation_data=(valid_tensors, valid_targets),
          epochs=epochs, callbacks=[checkpointer], verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fec0cfff5f8>

In [37]:
## 加载具有最好验证loss的模型

model.load_weights('saved_models/weights.best.from_scratch.hdf5')

In [38]:
# 获取测试数据集中每一个图像所预测种类的index
predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_tensors]

# 报告测试准确率
test_accuracy = 100*np.sum(np.array(predictions)==np.argmax(test_targets, axis=1))/len(predictions)
print('Test accuracy: %.2f%%' % test_accuracy)

Test accuracy: 65.83%


In [62]:
samples = glob("images/*")
print(samples)
sample_tensors = paths_to_tensor(samples).astype('float32')/255

 50%|█████     | 5/10 [00:00<00:00, 40.78it/s]

['images/zhi2.jpeg', 'images/cat_3.png', 'images/sample_ROC_curve.png', 'images/cat_2.jpeg', 'images/skin_disease_classes.png', 'images/zhi.jpeg', 'images/ISIC_0014872.jpg', 'images/cat_1.jpeg', 'images/sample_confusion_matrix.png', 'images/ISIC_0013739.jpg']


100%|██████████| 10/10 [00:01<00:00,  9.65it/s]


In [63]:
sample_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in sample_tensors]
for i in range(len(sample_predictions)):
    print(" %s be predicted as %s " %(samples[i], kind_names[sample_predictions[i]]))

 images/zhi2.jpeg be predicted as nevus 
 images/cat_3.png be predicted as nevus 
 images/sample_ROC_curve.png be predicted as nevus 
 images/cat_2.jpeg be predicted as nevus 
 images/skin_disease_classes.png be predicted as nevus 
 images/zhi.jpeg be predicted as nevus 
 images/ISIC_0014872.jpg be predicted as seborrheic_keratosis 
 images/cat_1.jpeg be predicted as nevus 
 images/sample_confusion_matrix.png be predicted as nevus 
 images/ISIC_0013739.jpg be predicted as nevus 


In [73]:
from keras.applications.resnet50 import ResNet50 preprocess_input

ValueError: Data is not binary and pos_label is not specified

### 迁移算法

In [None]:
from keras.applications.resnet50 import ResNet50 preprocess_input

In [None]:
ResnetInstance = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

In [None]:
train_RestnetResult = ResnetInstance.predict(preprocess_input(train_tensors))
valid_RestnetResult = ResnetInstance.predict(preprocess_input(valid_tensors))
test_RestnetResult = ResnetInstance.predict(preprocess_input(test_tensors))

In [None]:
import numpy as np
np.savez('bottleneck/DermRestnet50Data.npz', train=train_RestnetResult, valid=valid_RestnetResult, test=test_RestnetResult)

In [None]:
bottleneck_features = np.load('bottleneck/DermRestnet50Data.npz')
train_resnet50 = bottleneck_features['train']
valid_resnet50 = bottleneck_features['valid']
test_resnet50 = bottleneck_features['test']

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Dropout, MaxPooling2D, GlobalAveragePooling2D

resnet50_model = Sequential()
resnet50_model.add(GlobalAveragePooling2D(input_shape=(train_restnet50.shape[1:])))
resnet50_model.add(Dense(100))
resnet50_model.add(Dropout(rate=0.2))
resnet50_model.add(Dense(3, activation='softmax'))
resnet50_model.summary()

In [None]:
resnet50_model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [None]:
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='savemodels/resnet50modelbestweight.hdf5', save_best_only=True, verbose=1)
resnet50_model.fit(train_resnet50, train_targets, 
                   validation_data=(valid_resnet50,valid_targets),
                   callback=[checkpointer], epochs=20, batch_size=20)


In [None]:
predictions = resnet50_model.predict(test_resnet50)

In [None]:
save_content = np.hstack(train_files, predictions[:,0], predictions[:,1]+predictions[:,2])
save_result = np.vstack([id, task_1, task_2], save_content)

In [None]:
np.savetxt('prediction.csv', save_result, delimeter=',')

In [None]:
%run get_results.py prediction.csv 0.4