In [46]:
import pandas as pd
import numpy as np
from IPython.display import display
from tqdm import tqdm
import pickle

from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.models import Model

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

## CFG

In [50]:
debug=True

dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
models_dir="./models/" if debug else "../input/efficientnet-lightgbm-models/"

num_features=100
target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']

In [14]:
test=pd.read_csv(dataset_dir+"sample_submission.csv")

if debug:
    n_samples=100
    train,test=train_test_split(test,test_size=n_samples)

display(test)

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present,PatientID
3714,1.2.826.0.1.3680043.8.498.92511081648859045512...,0,0,0,0,0,0,0,0,0,1,0,5c2d087c4
1827,1.2.826.0.1.3680043.8.498.72202199038776592560...,0,0,1,0,0,0,1,0,0,1,0,998742a32
1163,1.2.826.0.1.3680043.8.498.80876314882630833396...,0,0,0,0,0,0,0,0,0,1,0,cdbe6c04e
26642,1.2.826.0.1.3680043.8.498.86958603893703989109...,0,0,0,0,0,0,0,0,0,1,0,0c9913892
11050,1.2.826.0.1.3680043.8.498.57208477845735660395...,0,0,0,0,0,0,0,1,0,1,0,663828d52
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9799,1.2.826.0.1.3680043.8.498.64554487703874739869...,0,0,0,0,0,0,0,0,0,1,0,05781e73d
27012,1.2.826.0.1.3680043.8.498.81727313586288439162...,0,0,0,0,0,0,0,0,0,1,0,e8c275cb5
25168,1.2.826.0.1.3680043.8.498.21415798723606531651...,0,0,0,0,0,0,0,0,0,1,0,475886299
15685,1.2.826.0.1.3680043.8.498.69893073810882891467...,0,0,1,0,0,0,1,0,1,1,0,e65a98a3e


## EfficientNetB7による出力の取得

In [10]:
nn_model=load_model(models_dir+"efficientnetB7.h5")



In [21]:
pred_list=[]
for uid in tqdm(test["StudyInstanceUID"]):
    img_pil=image.load_img(f"{dataset_dir}test/{uid}.jpg", target_size=(256,256)) # PIL形式で画像を読み込み、リサイズ
    img=image.img_to_array(img_pil)
    img=np.expand_dims(img,axis=0)
    nn_pred=nn_model.predict(preprocess_input(img))
    pred_list.append(nn_pred[0])

nn_pred=np.array(pred_list)

100%|██████████| 100/100 [04:41<00:00,  2.82s/it]


### 正規化する

In [40]:
scaler=pickle.load(open(f"{models_dir}minmaxscaler.pickle","rb"))

input_df=pd.concat([test.reset_index(drop=True)["StudyInstanceUID"],pd.DataFrame(scaler.transform(nn_pred))],axis=1)
display(input_df)

Unnamed: 0,StudyInstanceUID,0,1,2,3,4,5,6,7,8,...,2550,2551,2552,2553,2554,2555,2556,2557,2558,2559
0,1.2.826.0.1.3680043.8.498.92511081648859045512...,0.119070,0.263644,0.278235,0.209357,0.161661,0.526716,0.367052,0.250044,0.184465,...,0.289838,0.266785,0.384940,0.190104,0.020290,0.215851,0.320358,0.139523,0.295440,0.504105
1,1.2.826.0.1.3680043.8.498.72202199038776592560...,0.239098,0.297745,0.397188,0.672767,0.343613,0.060023,0.306063,0.219429,0.441474,...,0.646253,0.261628,0.221452,0.164254,0.183663,0.201685,0.449417,0.171543,0.443069,0.160373
2,1.2.826.0.1.3680043.8.498.80876314882630833396...,0.177597,0.245297,0.187547,0.151344,0.090139,0.363749,0.096986,0.259431,0.258411,...,0.228972,0.236976,0.240752,0.229009,0.102513,0.211103,0.122351,0.209610,0.193169,0.169419
3,1.2.826.0.1.3680043.8.498.86958603893703989109...,0.160498,0.238281,0.236376,0.512910,0.075272,0.081705,0.307845,0.255978,0.536275,...,0.425582,0.249524,0.153275,0.113976,0.544672,0.545505,0.323274,0.172273,0.524870,0.231440
4,1.2.826.0.1.3680043.8.498.57208477845735660395...,0.010415,0.119126,0.171180,0.059051,0.068936,0.322570,0.197295,0.325978,0.198289,...,0.252918,0.123291,0.291169,0.118990,0.058406,0.052565,0.152916,0.058091,0.293701,0.168928
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1.2.826.0.1.3680043.8.498.64554487703874739869...,0.075960,0.250470,0.368195,0.397741,0.192918,0.360085,0.293927,0.325759,0.415852,...,0.307494,0.256166,0.267720,0.059916,0.079012,0.205138,0.246832,0.245874,0.200497,0.286569
96,1.2.826.0.1.3680043.8.498.81727313586288439162...,0.189078,0.166092,0.241421,0.148257,0.088885,0.225218,0.169655,0.471320,0.251202,...,0.251284,0.258276,0.361873,0.263943,0.124246,0.362161,0.223106,0.277254,0.195396,0.113756
97,1.2.826.0.1.3680043.8.498.21415798723606531651...,0.363163,0.264887,0.188068,0.218152,0.192549,0.133488,0.061303,0.259696,0.504851,...,0.278206,0.064305,0.362654,0.063864,0.074330,0.280635,0.219060,0.075135,0.229701,0.299472
98,1.2.826.0.1.3680043.8.498.69893073810882891467...,0.175664,0.153901,0.221214,0.411451,0.055273,0.121665,0.252395,0.257925,0.447162,...,0.422317,0.274531,0.300598,0.125339,0.061230,0.245083,0.335377,0.230916,0.347108,0.366749


## AutoEncoderによる次元削減

In [44]:
autoencoder=load_model(models_dir+"autoencoder_splits10")
layer_name="dense_1"
hidden_layer_model=Model(inputs=autoencoder.input,outputs=autoencoder.get_layer(layer_name).output)

pred=hidden_layer_model.predict(input_df.iloc[:,1:])
features=pd.DataFrame(pred)
display(features)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,0.0,4.071238,2.036263,5.921571,5.921043,3.770345,3.212769,0.0,3.320835,4.148574,...,0.0,1.002130,5.348242,4.684763,4.894108,6.209260,2.895662,3.209881,3.613510,3.648977
1,0.0,2.854680,2.750374,3.024800,2.014940,2.875407,4.439881,0.0,3.118279,4.547942,...,0.0,0.005073,4.097000,2.415063,2.701719,1.504981,1.903784,3.942729,2.788665,2.748391
2,0.0,2.996636,2.032366,3.741539,3.420367,3.992173,1.367723,0.0,3.529947,4.728153,...,0.0,1.247855,1.715913,2.419096,1.967377,3.802683,2.102259,3.431104,2.279551,4.326679
3,0.0,5.119683,1.653423,3.126246,2.197192,4.220846,5.762908,0.0,5.082413,3.480234,...,0.0,1.421378,4.115449,2.397384,4.499605,1.243202,2.823926,3.378100,1.103126,2.883510
4,0.0,5.439157,1.649039,4.936476,3.128970,5.116446,3.104952,0.0,1.527074,4.432976,...,0.0,1.074558,2.741527,2.533481,2.462220,3.836189,4.059120,4.323339,2.290179,1.666896
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.0,3.077638,2.296414,6.066879,3.391178,2.847261,4.514524,0.0,2.693018,7.698250,...,0.0,1.717739,6.152410,2.026255,5.147166,3.931139,3.577106,5.506383,3.287870,1.856724
96,0.0,2.990176,1.980487,1.841533,2.292038,2.224017,2.024535,0.0,1.930074,2.060823,...,0.0,1.358291,0.741607,2.351303,1.801081,2.889492,2.037588,6.047063,2.764158,3.314187
97,0.0,4.075650,2.292416,5.602129,4.397439,4.928271,3.629215,0.0,4.481905,5.343220,...,0.0,1.461045,2.854066,2.550186,0.000000,4.443690,3.698866,5.718501,1.701826,1.560364
98,0.0,3.128833,0.456956,3.907800,0.585200,3.276345,3.019457,0.0,2.161268,4.375148,...,0.0,0.938636,2.700490,2.477148,2.496736,1.698159,0.973040,3.344852,0.634442,3.074343


## 訓練済みLightGBMモデルによる推論

In [45]:
submission=test.copy()

for i,col_name in enumerate(target_cols):
    model=pickle.load(open(f"{models_dir}autoencoder/lgb_model_{i+1}.pickle","rb"))
    pred=model.predict(features)
    submission.loc[:,col_name]=pred
    
submission.to_csv("submission.csv",index=False)
display(submission)

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present,PatientID
3714,1.2.826.0.1.3680043.8.498.92511081648859045512...,3.331270e-07,0.001175,0.009735,0.000240,0.000114,0.000098,0.003578,0.085243,0.175534,0.759680,0.000117,5c2d087c4
1827,1.2.826.0.1.3680043.8.498.72202199038776592560...,5.563977e-07,0.082850,0.585364,0.003241,0.008104,0.055872,0.362256,0.075507,0.321947,0.806377,0.004336,998742a32
1163,1.2.826.0.1.3680043.8.498.80876314882630833396...,4.516759e-07,0.023068,0.087228,0.001259,0.001579,0.006639,0.042180,0.129194,0.310962,0.699059,0.003999,cdbe6c04e
26642,1.2.826.0.1.3680043.8.498.86958603893703989109...,8.251645e-07,0.007628,0.114408,0.000890,0.001386,0.010438,0.097977,0.090246,0.271806,0.787730,0.001839,0c9913892
11050,1.2.826.0.1.3680043.8.498.57208477845735660395...,3.608875e-07,0.000853,0.031204,0.000490,0.000302,0.000168,0.025144,0.311791,0.246107,0.720156,0.000094,663828d52
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9799,1.2.826.0.1.3680043.8.498.64554487703874739869...,3.720634e-07,0.000950,0.039971,0.000503,0.001425,0.000350,0.016289,0.084588,0.299466,0.677561,0.000230,05781e73d
27012,1.2.826.0.1.3680043.8.498.81727313586288439162...,3.913753e-07,0.031818,0.481371,0.011278,0.005315,0.009961,0.356581,0.112897,0.371261,0.734415,0.005605,e8c275cb5
25168,1.2.826.0.1.3680043.8.498.21415798723606531651...,1.272518e-06,0.026294,0.410843,0.004312,0.003509,0.014203,0.048747,0.114908,0.291271,0.682628,0.001322,475886299
15685,1.2.826.0.1.3680043.8.498.69893073810882891467...,7.870610e-07,0.027954,0.669998,0.001611,0.048213,0.002471,0.722928,0.135318,0.363715,0.748413,0.004788,e65a98a3e
