In [1]:
import pandas as pd
import numpy as np
import pickle
from IPython.display import display
from tqdm.notebook import tqdm

from tensorflow import keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras import models
from tensorflow.keras.applications.efficientnet import preprocess_input

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

## CFG

In [2]:
class CFG:
    debug=True

    input_dir="../input/efficientnet_output_straight/"
    dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
    models_dir="./models/" if debug else "../input/efficientnet-lightgbm-models/"

    n_folds=4
    num_features=100
    target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged',           'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']

In [3]:
test=pd.read_csv(f"{CFG.dataset_dir}sample_submission.csv")

if CFG.debug:
    n_samples=10
    train,test=train_test_split(test,test_size=n_samples)

display(test)

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present
3219,1.2.826.0.1.3680043.8.498.12126834418646094795...,0,0,0,0,0,0,0,0,0,0,0
3369,1.2.826.0.1.3680043.8.498.25679526645412462862...,0,0,0,0,0,0,0,0,0,0,0
1206,1.2.826.0.1.3680043.8.498.96154222694757788294...,0,0,0,0,0,0,0,0,0,0,0
3032,1.2.826.0.1.3680043.8.498.42051546658503359881...,0,0,0,0,0,0,0,0,0,0,0
627,1.2.826.0.1.3680043.8.498.11665192673224035444...,0,0,0,0,0,0,0,0,0,0,0
212,1.2.826.0.1.3680043.8.498.12361605029931768086...,0,0,0,0,0,0,0,0,0,0,0
2319,1.2.826.0.1.3680043.8.498.13233430745466640843...,0,0,0,0,0,0,0,0,0,0,0
741,1.2.826.0.1.3680043.8.498.11086677407703525147...,0,0,0,0,0,0,0,0,0,0,0
1821,1.2.826.0.1.3680043.8.498.11397896435572402386...,0,0,0,0,0,0,0,0,0,0,0
1178,1.2.826.0.1.3680043.8.498.67238847089694621353...,0,0,0,0,0,0,0,0,0,0,0


## EfficientNetB7による出力の取得

In [4]:
nn_model=models.load_model(f"{CFG.models_dir}efficientnetB7.h5")



In [5]:
pred_list=[]
for uid in tqdm(test["StudyInstanceUID"]):
    img_pil=image.load_img(f"{CFG.dataset_dir}test/{uid}.jpg", target_size=(256,256)) # PIL形式で画像を読み込み、リサイズ
    img=image.img_to_array(img_pil)
    img=np.expand_dims(img,axis=0)
    nn_pred=nn_model.predict(preprocess_input(img))
    pred_list.append(nn_pred[0])

nn_pred=np.array(pred_list)

  0%|          | 0/10 [00:00<?, ?it/s]

## LightGBMによる推論

### 正規化する

In [6]:
scaler=pickle.load(open(f"{CFG.models_dir}minmaxscaler.pickle","rb"))

nn_pred_norm=pd.concat([test.reset_index(drop=True)["StudyInstanceUID"],pd.DataFrame(scaler.transform(nn_pred))],axis=1)

### AutoEncoderによる次元削減

In [7]:
autoencoder=models.load_model(f"{CFG.models_dir}autoencoder_splits10")
layer_name="dense_1"
hidden_layer_model=models.Model(inputs=autoencoder.input,outputs=autoencoder.get_layer(layer_name).output)

pred=hidden_layer_model.predict(nn_pred_norm.iloc[:,1:])
lgb_features=pd.DataFrame(pred)
display(lgb_features)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,0.0,4.610896,2.287433,1.079081,3.113541,2.699734,2.507464,0.0,4.774673,2.344668,...,0.0,1.375791,1.445181,2.725537,3.927274,3.677275,2.129272,2.285991,2.836764,3.729412
1,0.0,6.321053,2.028604,9.139301,6.072942,4.005536,6.073335,0.0,3.072227,4.146487,...,0.0,0.677671,8.266397,4.416893,3.822499,3.787554,5.842286,5.339317,6.235743,5.241741
2,0.0,4.349279,2.15665,6.099969,4.143853,4.484516,1.819214,0.0,3.188516,4.143863,...,0.0,1.061399,3.660784,3.562563,4.687096,3.942639,6.152863,4.082253,6.297458,2.862765
3,0.0,3.295934,2.945225,4.809831,5.967846,4.727466,5.091046,0.0,3.888574,4.716721,...,0.0,0.955105,4.463363,5.155314,1.467798,3.610177,3.089406,5.189339,2.285418,5.03569
4,0.0,3.627803,1.614951,2.115899,4.266964,3.094385,2.092641,0.0,5.469833,4.76927,...,0.0,1.082381,1.982177,2.827104,2.772093,2.432705,3.793418,2.890932,3.698867,2.216501
5,0.0,2.830598,1.627711,3.576441,4.651927,3.141523,1.459785,0.0,5.029336,1.142954,...,0.0,0.95203,2.820015,1.253415,4.05907,4.229985,1.601971,3.282743,2.178496,1.816425
6,0.0,2.860663,1.220108,2.974442,3.687876,3.085447,4.876204,0.0,3.701144,3.918498,...,0.0,1.48849,4.101911,2.209322,3.09329,3.137137,0.712516,3.58156,0.846042,3.300781
7,0.0,3.385466,1.949323,5.726854,3.638127,3.732755,2.819599,0.0,3.805809,3.246527,...,0.0,1.48967,4.457733,3.673737,1.961911,1.651317,4.34696,3.335839,3.545384,5.883523
8,0.0,5.638882,2.871177,4.898509,3.419362,2.481212,3.275216,0.0,3.894957,5.025724,...,0.0,1.42678,1.042239,3.691808,2.156105,3.863074,3.740673,3.573404,1.723875,4.66383
9,0.0,2.041071,1.784242,1.867171,1.907627,2.833152,2.128181,0.0,2.359791,7.038349,...,0.0,1.528909,3.074055,3.443143,3.060911,2.215248,2.943035,5.199098,4.301796,4.346797


### LightGBMモデルによる推論

In [8]:
lgb_pred=test.copy()

for i,col_name in enumerate(CFG.target_cols):
    model=pickle.load(open(f"{CFG.models_dir}autoencoder_smallLR/lgb_model_{i+1}.pickle","rb"))
    pred=model.predict(lgb_features)
    lgb_pred.loc[:,col_name]=pred
    
display(lgb_pred)

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present
3219,1.2.826.0.1.3680043.8.498.12126834418646094795...,2.803708e-06,0.002593,0.051365,0.004701,0.009444,0.004217,0.016914,0.075955,0.225435,0.723785,5.3e-05
3369,1.2.826.0.1.3680043.8.498.25679526645412462862...,3.293098e-06,0.002041,0.012535,0.00669,0.010043,0.000617,0.015517,0.037513,0.369701,0.864656,1e-05
1206,1.2.826.0.1.3680043.8.498.96154222694757788294...,4.049137e-07,0.001347,0.019568,0.002014,0.00309,0.000219,0.014508,0.121895,0.406926,0.470737,1.2e-05
3032,1.2.826.0.1.3680043.8.498.42051546658503359881...,1.758675e-06,0.002053,0.004068,0.001427,0.002841,0.000184,0.004057,0.075709,0.349084,0.733381,1.3e-05
627,1.2.826.0.1.3680043.8.498.11665192673224035444...,1.27804e-06,0.002819,0.017908,0.005862,0.003266,0.003253,0.003661,0.120294,0.236891,0.662604,7e-06
212,1.2.826.0.1.3680043.8.498.12361605029931768086...,6.230805e-07,0.000409,0.004627,0.001781,0.006965,0.000225,0.004573,0.11967,0.152003,0.788167,4e-06
2319,1.2.826.0.1.3680043.8.498.13233430745466640843...,6.391392e-07,0.001689,0.018813,0.004151,0.002596,0.002203,0.002499,0.13291,0.192934,0.691537,1.3e-05
741,1.2.826.0.1.3680043.8.498.11086677407703525147...,1.402018e-06,0.00154,0.013388,0.0174,0.006546,0.000745,0.009481,0.053175,0.198195,0.833107,1.1e-05
1821,1.2.826.0.1.3680043.8.498.11397896435572402386...,9.333147e-07,0.001518,0.018155,0.003774,0.003276,0.000446,0.007727,0.045477,0.294294,0.719398,5e-06
1178,1.2.826.0.1.3680043.8.498.67238847089694621353...,2.792796e-06,0.143112,0.692782,0.005688,0.03181,0.123156,0.445049,0.172956,0.247399,0.664242,0.001746


## NNによる予測値の準備

In [9]:
with open(f"{CFG.models_dir}eff_dense/model_structure","rt") as f:
    model_json_str=f.read()

dense_model=models.model_from_json(model_json_str)
dense_model.load_weights(f"{CFG.models_dir}eff_dense/checkpoint")
dense_model.compile(optimizer="adam",loss="binary_crossentropy",metrics=[keras.metrics.AUC(multi_label=True)])
dense_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout (Dropout)            (None, 2560)              0         
_________________________________________________________________
dense (Dense)                (None, 11)                28171     
Total params: 28,171
Trainable params: 28,171
Non-trainable params: 0
_________________________________________________________________


In [10]:
dense_pred=pd.DataFrame(dense_model.predict(nn_pred),columns=CFG.target_cols)
display(dense_pred)

Unnamed: 0,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present
0,0.003303,0.005226,0.0928,0.003363,0.008134,0.062013,0.041829,0.144047,0.21989,0.689127,0.014735
1,0.000304,0.004209,0.007972,0.002308,0.005092,0.007019,0.0138,0.028063,0.440158,0.697681,0.000441
2,0.000298,0.002647,0.012649,0.000326,0.001204,0.001282,0.016383,0.10469,0.382761,0.529028,0.000304
3,0.000439,0.002533,0.002026,0.000211,0.001014,0.006315,0.004297,0.076696,0.236585,0.670058,0.003372
4,0.001155,0.003494,0.03208,0.002272,0.004759,0.015004,0.011981,0.133886,0.201644,0.623409,0.007068
5,0.000224,0.000385,0.009272,0.000388,0.005886,0.002291,0.004574,0.108913,0.152641,0.690991,0.000355
6,0.000208,0.001573,0.014661,0.001238,0.001,0.048118,0.001748,0.094073,0.197476,0.730662,0.002744
7,0.000154,0.000472,0.006778,0.002043,0.002552,0.005677,0.006775,0.043456,0.199305,0.754521,0.000639
8,0.000827,0.004066,0.073085,0.001671,0.002812,0.057779,0.019507,0.051358,0.314613,0.612936,0.001001
9,0.003123,0.188135,0.8178,0.008107,0.047738,0.483714,0.547041,0.130608,0.220911,0.720041,0.366497


## アンサンブル

In [17]:
scaler_lgb=pickle.load(open(f"{CFG.models_dir}standardscaler_lgb.pickle","rb"))
scaler_dense=pickle.load(open(f"{CFG.models_dir}standardscaler_dense.pickle","rb"))

lgb_pred_norm=pd.DataFrame(scaler_lgb.transform(lgb_pred[CFG.target_cols]),columns=CFG.target_cols)
dense_pred_norm=pd.DataFrame(scaler_dense.transform(dense_pred[CFG.target_cols]),columns=CFG.target_cols)

In [12]:
best_params=pickle.load(open(f"{CFG.models_dir}weightedsum_params.pickle","rb"))

submission=test.copy().reset_index(drop=True)


for col_name in CFG.target_cols:
    p=best_params[col_name]
    ensemble_pred=lgb_pred_norm[col_name]*p+dense_pred_norm[col_name]*(1-p)

    submission[col_name]=ensemble_pred

submission.to_csv("submission.csv",index=False)
display(submission)

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present
0,1.2.826.0.1.3680043.8.498.12126834418646094795...,1.488457,-0.314101,-0.167382,-0.148198,0.173944,-0.182737,-0.270916,-0.46556,-0.522848,0.08311,-0.246775
1,1.2.826.0.1.3680043.8.498.25679526645412462862...,0.957575,-0.327119,-0.358785,0.306841,0.24548,-0.384097,-0.281583,-1.378666,1.27947,1.440715,-0.348699
2,1.2.826.0.1.3680043.8.498.96154222694757788294...,-1.013486,-0.343483,-0.32412,-0.763035,-0.58496,-0.405495,-0.289285,0.625645,1.744531,-2.355572,-0.347793
3,1.2.826.0.1.3680043.8.498.42051546658503359881...,-0.049834,-0.326834,-0.400523,-0.897438,-0.614752,-0.393547,-0.369078,-0.471403,1.021899,0.175587,-0.334708
4,1.2.826.0.1.3680043.8.498.11665192673224035444...,-0.170904,-0.308753,-0.332301,0.117522,-0.563934,-0.317289,-0.3721,0.587612,-0.379728,-0.506507,-0.327182
5,1.2.826.0.1.3680043.8.498.12361605029931768086...,-0.885904,-0.365587,-0.397768,-0.816415,-0.122102,-0.402875,-0.365137,0.57278,-1.440233,0.703574,-0.356578
6,1.2.826.0.1.3680043.8.498.13233430745466640843...,-0.879586,-0.335403,-0.32784,-0.274184,-0.643979,-0.253244,-0.380972,0.887264,-0.928891,-0.227674,-0.336932
7,1.2.826.0.1.3680043.8.498.11086677407703525147...,-0.374918,-0.33893,-0.354581,2.75768,-0.172155,-0.385165,-0.327672,-1.006647,-0.863157,1.136665,-0.347554
8,1.2.826.0.1.3680043.8.498.11397896435572402386...,-0.500671,-0.33945,-0.331083,-0.360437,-0.562781,-0.260333,-0.341057,-1.189507,0.337411,0.040826,-0.352253
9,1.2.826.0.1.3680043.8.498.67238847089694621353...,1.429269,2.999659,2.994381,0.077664,2.845239,2.984782,2.997799,1.838482,-0.248456,-0.490722,2.998474
