## 基础信息及配置

In [1]:
import sys; print('Python %s on %s' % (sys.version, sys.platform))

sys.path.extend(['./cnn_finetune','./lib'])

from load_cifar import load_cifar10_data, load_cifar100_data

import numpy as np; print('numpy:', np.__version__)
import sklearn; print('sklearn:', sklearn.__version__)
import tensorflow as tf; print('tensorflow', tf.__version__)
import keras; print('keras', keras.__version__)

import os
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session

from lib.DataGenerator import DataGenerator

# set gpu visible environment variable
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

Python 3.5.2 (default, Nov 12 2018, 13:43:14) 
[GCC 5.4.0 20160609] on linux


Using TensorFlow backend.


numpy: 1.16.2
sklearn: 0.20.3
tensorflow 1.13.1
keras 2.2.4


In [2]:
config = tf.ConfigProto()

# cpu
# config = tf.ConfigProto(intra_op_parallelism_threads=num_cores, inter_op_parallelism_threads=num_cores, allow_soft_placement=True, device_count={'CPU': 4})
    
# gpu allow_growth
config.gpu_options.allow_growth = True

# set memory limit of gpu
# config.gpu_options.per_process_gpu_memory_fraction = 0.5

set_session(tf.Session(config=config))

## 加载模型

In [3]:
from keras.optimizers import SGD, Adam
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping, ReduceLROnPlateau, TensorBoard, CSVLogger


from cnn_finetune.resnet_152 import resnet152_model

In [4]:
batch_size = 16

In [5]:
def get_model(model_name):
    img_rows, img_cols = 224, 224 # Resolution of inputs
    channel = 3
    num_classes = 2019
    
    load_weights = False
    fine_tuning = False
    
    # Load our model
    model = model_name(img_rows, img_cols, channel, 
                              num_classes=num_classes, fine_tuning=fine_tuning, load_weights=load_weights)
    return model

## 运用val数据集进行验证

In [6]:
def validate_val_dataset(model):
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8, decay=0.0, amsgrad=False)
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['acc'])
    
    # load val dataset
    val_data_generator = DataGenerator(file_type='val', corp=True, batch_size=batch_size, shuffle=False)
    
    # evaluate the model
    valid_evaluate = model.evaluate_generator(val_data_generator, 
                                     steps = len(val_data_generator),
                                     workers=60, use_multiprocessing=True,
                                     verbose=1)
    print(valid_evaluate)
    return valid_evaluate

## 预测test数据集

In [7]:
def predict(model, data_generator):
    predictions = model.predict_generator(data_generator, 
                                     steps = len(data_generator),
                                     workers=60, use_multiprocessing=True,
                                     verbose=1)
    filenames = data_generator.image_iterator.filenames
    
    return filenames, predictions

## 预测过程

In [8]:
!ls -l models

total 1259764
-rw-r--r-- 1 root root 127695716 May  8 08:03 denesnet_161_20190508_2.9911_0.3629.hd5
-rw-r--r-- 1 root root 127695716 May 12 12:29 denesnet_161_4.76-0.18-model.hd5
-rw-r--r-- 1 root root 127393236 May 12 12:29 denesnet_161_4.76-0.18-weights.hd5
-rw-r--r-- 1 root root  33552480 May  7 15:29 densenet121_weights_tf.h5
drwxr-xr-x 4 root root      4096 May 14 03:44 densenet161
drwxr-xr-x 3 root root      4096 May 12 04:49 densenet161_0512_val未收敛
drwxr-xr-x 3 root root      4096 May 14 19:21 densenet161_former_3_GPU
drwxr-xr-x 3 root root      4096 May  9 16:32 densenet161_train_1
drwxr-xr-x 4 root root      4096 May  8 12:40 densenet161_val
drwxr-xr-x 3 root root      4096 May 10 00:39 densenet161_val_2
drwxr-xr-x 3 root root      4096 May 10 01:21 densenet161_val_3
drwxr-xr-x 3 root root      4096 May 10 08:48 densenet161_verifiy
-rw-r--r-- 1 root root 117815848 May  7 15:29 densenet161_weights_tf.h5
-rw-r--r-- 1 root root  59218328 May  7 15:29 densenet169_wei

In [None]:
!ls -l models/resnet_152

### resnet 152 

In [8]:
resnet152_predict_model = get_model(resnet152_model)

Instructions for updating:
Colocations handled automatically by placer.
load_weights False
resnet152_model: classes: 2019 fine-tuning: False
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
data (InputLayer)               (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_zeropadding (ZeroPadding2 (None, 230, 230, 3)  0           data[0][0]                       
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9408        conv1_zeropadding[0][0]          
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         co

res4b5_relu (Activation)        (None, 14, 14, 1024) 0           res4b5[0][0]                     
__________________________________________________________________________________________________
res4b6_branch2a (Conv2D)        (None, 14, 14, 256)  262144      res4b5_relu[0][0]                
__________________________________________________________________________________________________
bn4b6_branch2a (BatchNormalizat (None, 14, 14, 256)  1024        res4b6_branch2a[0][0]            
__________________________________________________________________________________________________
scale4b6_branch2a (Scale)       (None, 14, 14, 256)  512         bn4b6_branch2a[0][0]             
__________________________________________________________________________________________________
res4b6_branch2a_relu (Activatio (None, 14, 14, 256)  0           scale4b6_branch2a[0][0]          
__________________________________________________________________________________________________
res4b6_bra

In [10]:
# load weight
weights_path="./models/imaterialist-e01-2.3851.hd5"
resnet152_predict_model.load_weights(weights_path)

In [11]:
validate_val_dataset(resnet152_predict_model)

Found 10095 images belonging to 2019 classes.
[2.3850735998082833, 0.43605745419409675]


[2.3850735998082833, 0.43605745419409675]

In [12]:
test_data_generator = DataGenerator(file_type='predict', corp=True, batch_size=batch_size, shuffle=False)

Found 90833 images belonging to 1 classes.


In [13]:
rest152_filenames, rest152_pred = predict(resnet152_predict_model, test_data_generator)



In [14]:
import os
directory = "./submissions/middle/resnet152/imaterialist-e01-2.3851"
os.makedirs(directory)
np.save(directory+"/resnet152_pred.npy",rest152_pred)
np.save(directory+"/resnet152_filenames.npy",rest152_filenames)

### densenet 161 

In [8]:
from cnn_finetune.densenet161_weights_path import densenet161_model

densenet161_predict_model = get_model(densenet161_model)

Instructions for updating:
Colocations handled automatically by placer.
dense161_model: classes: 2019 fine-tuning: False
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
data (InputLayer)               (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_zeropadding (ZeroPadding2 (None, 230, 230, 3)  0           data[0][0]                       
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 96) 14112       conv1_zeropadding[0][0]          
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 96) 384         conv1[0][0]           

relu5_6_x1 (Activation)         (None, 7, 7, 1296)   0           conv5_6_x1_scale[0][0]           
__________________________________________________________________________________________________
conv5_6_x1 (Conv2D)             (None, 7, 7, 192)    248832      relu5_6_x1[0][0]                 
__________________________________________________________________________________________________
conv5_6_x2_bn (BatchNormalizati (None, 7, 7, 192)    768         conv5_6_x1[0][0]                 
__________________________________________________________________________________________________
conv5_6_x2_scale (Scale)        (None, 7, 7, 192)    384         conv5_6_x2_bn[0][0]              
__________________________________________________________________________________________________
relu5_6_x2 (Activation)         (None, 7, 7, 192)    0           conv5_6_x2_scale[0][0]           
__________________________________________________________________________________________________
conv5_6_x2

In [9]:
weights_path="./models/densenet161_newtrain3/imaterialist-e01-1.6757.hd5"
densenet161_predict_model.load_weights(weights_path)

In [10]:
validate_val_dataset(densenet161_predict_model)

Found 10095 images belonging to 2019 classes.
[1.6756566318443, 0.5551263001840147]


[1.6756566318443, 0.5551263001840147]

In [11]:
test_data_generator = DataGenerator(file_type='predict', corp=True, batch_size=batch_size, shuffle=False)

Found 90833 images belonging to 1 classes.


In [12]:
densenet161_filenames, densenet161_pred = predict(densenet161_predict_model, test_data_generator)



In [13]:
import os
directory = "./submissions/middle/densenet161_newtrain3/imaterialist-e01-1.6757"
os.makedirs(directory)
np.save(directory+"/densenet161_pred.npy",densenet161_pred)
np.save(directory+"/densenet161_filenames.npy",densenet161_filenames)

## 融合模型答案

In [15]:
import numpy as np
import pandas as pd

In [16]:
a_dir = "./submissions/middle/densenet161/imaterialist-e01-1.8250"
a_filenames = np.load(a_dir+"/densenet161_filenames.npy")
a_predictions = np.load(a_dir+"/densenet161_pred.npy")


# b_dir = "./submissions/middle/densenet161_newtrain2/imaterialist-e01-1.6591"
b_dir = "./submissions/middle/densenet161_newtrain3/imaterialist-e01-1.6757"
b_filenames = np.load(b_dir+"/densenet161_filenames.npy")
b_predictions = np.load(b_dir+"/densenet161_pred.npy")

c_dir = "./submissions/middle/resnet152/imaterialist-e01-2.1856"
c_filenames = np.load(c_dir+"/resnet152_filenames.npy")
c_predictions = np.load(c_dir+"/resnet152_pred.npy")

d_dir = "./submissions/middle/resnet152/imaterialist-e01-2.3851"
d_filenames = np.load(d_dir+"/resnet152_filenames.npy")
d_predictions = np.load(d_dir+"/resnet152_pred.npy")

In [17]:
print(len(a_filenames))

90833


In [18]:
answer = a_filenames == b_filenames

In [19]:
for each in answer:
    if each != True:
        print(each)

In [20]:
answer = a_filenames == c_filenames
for each in answer:
    if each != True:
        print(each)

In [21]:
answer = a_filenames == d_filenames
for each in answer:
    if each != True:
        print(each)

### 1. 直接相加

In [41]:
new_pred = a_predictions + b_predictions
new_pred_temp = a_predictions + b_predictions

In [22]:
new_pred = a_predictions + b_predictions + c_predictions + d_predictions

In [23]:
print(sum(new_pred[0]))

3.9999999021615826


### 2. 取topN相加

In [14]:
def get_topN(pred, N):
    import copy
    idx_list = []
    prediction = copy.deepcopy(pred)
    for i in range(N):
        pos = prediction.argmax()
        idx_list.append(pos)
        prediction[pos] = -1
    return idx_list

In [15]:
def mixTopN(pred_a, pred_b, N):
    new_pred = np.zeros(2019, dtype=float)
    a_idx = get_topN(pred_a, N)
    b_idx = get_topN(pred_b, N)
    for i in range(N):
        new_pred[a_idx[i]] = new_pred[a_idx[i]] + pred_a[a_idx[i]]
        new_pred[b_idx[i]] = new_pred[b_idx[i]] + pred_b[b_idx[i]]
    return new_pred

In [16]:
new_pred = []
for idx in range(len(a_filenames)):
    tmp = mixTopN(new_pred_temp[idx], c_predictions[idx], 5)
    new_pred.append(tmp)

In [17]:
print(sum(new_pred[0]))

0.818539647385478


## top3获取并且包装为 csv 格式

In [24]:
import copy

In [44]:
predictions = copy.deepcopy(densenet161_pred)
filenames = copy.deepcopy(densenet161_filenames)

In [25]:
predictions = copy.deepcopy(new_pred)
filenames = copy.deepcopy(a_filenames)

In [26]:
top3=[]
top3_cnt = 0
for idx in range(len(filenames)):
    predict = copy.deepcopy(predictions[idx])
    temp = []
    for j in range(3):
        pos = predict.argmax()
        temp.append(str(pos))
        predict[pos]=-1
    image_id = filenames[idx].split('/')[-1]
    top3.append([image_id, ' '.join(temp)])

In [27]:
print(len(top3))

90833


In [28]:
top3.append(["e156a54a448d0e120593bbe1e50470f5.jpg", "0 1 2"])

In [29]:
import csv
submission_filename = "./submissions/submission_0602_result_4.csv"
headers=['id', 'predicted']
result = top3
with open(submission_filename, "w", newline='') as csvfile:
    writer  = csv.writer(csvfile)
    writer.writerow(headers)
    for row in result:
        writer.writerow(row)

In [30]:
import csv
import re

pattern = re.compile("\\d{1,}\ \\d{1,}\ \\d{1,}")  # 'int int int'的格式

check_flag = True

NUM_TEST = 90834

with open(submission_filename) as csvfile:
    print(submission_filename)
    csv_reader = csv.reader(csvfile)
    header = next(csv_reader)
    
    # judge the header
    if(header == ['id', 'predicted']):
        print("Header is right.")
    else:
        check_flag = False
        print("Wrong header:", header)
        print("Accepted header is ['id', 'predicted']")
    
    #judege each answer
    cnt = 0
    for each in csv_reader:
        cnt = cnt+1
        if len(each) != 2:
            check_flag = False
            print("Accepted result is [id, class]")
        if not pattern.match(each[-1]):
            check_flag = False
            print(cnt)
            print(each)
            print("Wrong result format:", each[-1])
            
    if(NUM_TEST != cnt):
        check_flag = False
        print("Wrong number of submission file.")
        
    if check_flag:
        print("You can submit the result")
    else:
        print("!!! Check the result again")
        

./submissions/submission_0602_result_4.csv
Header is right.
You can submit the result
