In [10]:
import pandas as pd
import numpy as np
from IPython.display import display
from tqdm.notebook import tqdm
import pickle

from tensorflow.keras.models import Model, load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import preprocess_input

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

## CFG

In [2]:
class CFG:
    debug=True

    dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
    models_dir="./models/" if debug else "../input/efficientnet-lightgbm-models/"

    target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged',           'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']

In [3]:
test=pd.read_csv(f"{CFG.dataset_dir}sample_submission.csv")

if CFG.debug:
    n_samples=10
    train,test=train_test_split(test,test_size=n_samples)

display(test)

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present
2764,1.2.826.0.1.3680043.8.498.53187330532968389747...,0,0,0,0,0,0,0,0,0,0,0
2293,1.2.826.0.1.3680043.8.498.23628841926681382432...,0,0,0,0,0,0,0,0,0,0,0
916,1.2.826.0.1.3680043.8.498.95814725756838594874...,0,0,0,0,0,0,0,0,0,0,0
26,1.2.826.0.1.3680043.8.498.30188537311791900160...,0,0,0,0,0,0,0,0,0,0,0
2872,1.2.826.0.1.3680043.8.498.12633219127030026453...,0,0,0,0,0,0,0,0,0,0,0
2458,1.2.826.0.1.3680043.8.498.11169256056227577367...,0,0,0,0,0,0,0,0,0,0,0
1275,1.2.826.0.1.3680043.8.498.10205354356085088542...,0,0,0,0,0,0,0,0,0,0,0
1240,1.2.826.0.1.3680043.8.498.24239184506702911292...,0,0,0,0,0,0,0,0,0,0,0
3577,1.2.826.0.1.3680043.8.498.81464483108873296584...,0,0,0,0,0,0,0,0,0,0,0
510,1.2.826.0.1.3680043.8.498.32689913031429495918...,0,0,0,0,0,0,0,0,0,0,0


## EfficientNetB7による出力の取得

In [4]:
nn_model=load_model(f"{CFG.models_dir}efficientnetB7.h5")



In [11]:
pred_list=[]
for uid in tqdm(test["StudyInstanceUID"]):
    img_pil=image.load_img(f"{CFG.dataset_dir}test/{uid}.jpg", target_size=(256,256)) # PIL形式で画像を読み込み、リサイズ
    img=image.img_to_array(img_pil)
    img=np.expand_dims(img,axis=0)
    nn_pred=nn_model.predict(preprocess_input(img))
    pred_list.append(nn_pred[0])

nn_pred=np.array(pred_list)

  0%|          | 0/10 [00:00<?, ?it/s]

### 正規化する

In [6]:
scaler=pickle.load(open(f"{CFG.models_dir}minmaxscaler.pickle","rb"))

input_df=pd.concat([test.reset_index(drop=True)["StudyInstanceUID"],pd.DataFrame(scaler.transform(nn_pred))],axis=1)
display(input_df)

Unnamed: 0,StudyInstanceUID,0,1,2,3,4,5,6,7,8,...,2550,2551,2552,2553,2554,2555,2556,2557,2558,2559
0,1.2.826.0.1.3680043.8.498.53187330532968389747...,0.392083,0.204822,0.302095,0.312136,0.02979,0.127774,0.08718,0.401984,0.301424,...,0.288278,0.240811,0.592575,0.084094,0.039363,0.374288,0.184754,0.232877,0.19965,0.300287
1,1.2.826.0.1.3680043.8.498.23628841926681382432...,0.178802,0.305499,0.180544,0.162037,0.175688,0.322159,0.27978,0.150125,0.286821,...,0.113365,0.109818,0.220005,0.192193,0.055772,0.20243,0.06487,0.072565,0.416423,0.18191
2,1.2.826.0.1.3680043.8.498.95814725756838594874...,0.163736,0.299436,0.252858,0.171036,0.06202,0.340633,0.091703,0.36962,0.452058,...,0.246682,0.212359,0.362382,0.275446,0.06972,0.237833,0.150405,0.073818,0.184411,0.390313
3,1.2.826.0.1.3680043.8.498.30188537311791900160...,0.476096,0.360794,0.383778,0.140472,0.352782,0.234401,0.185029,0.385253,0.187532,...,0.651302,0.142143,0.04762,0.255281,0.082383,0.322772,0.147672,0.074314,0.152038,0.07618
4,1.2.826.0.1.3680043.8.498.12633219127030026453...,0.067392,0.311888,0.185603,0.067984,0.064778,0.397559,0.178609,0.471995,0.279235,...,0.151908,0.194794,0.533105,0.191994,0.070432,0.094274,0.024481,0.132054,0.345809,0.413144
5,1.2.826.0.1.3680043.8.498.11169256056227577367...,0.110847,0.314346,0.101048,0.262023,0.198968,0.110342,0.173974,0.241759,0.53213,...,0.402385,0.220766,0.131296,0.172787,0.032482,0.07531,0.430619,0.054897,0.213571,0.22708
6,1.2.826.0.1.3680043.8.498.10205354356085088542...,0.100611,0.251807,0.176557,0.207098,0.16347,0.15286,0.244016,0.243979,0.216611,...,0.180387,0.292911,0.285579,0.181616,0.082373,0.208384,0.241177,0.639229,0.399528,0.220507
7,1.2.826.0.1.3680043.8.498.24239184506702911292...,0.045561,0.189722,0.125817,0.108249,0.124685,0.420566,0.131521,0.457789,0.124929,...,0.225335,0.167912,0.175035,0.352255,0.05688,0.06281,0.308791,0.178002,0.390412,0.228199
8,1.2.826.0.1.3680043.8.498.81464483108873296584...,0.107708,0.281665,0.123436,0.205817,0.040849,0.230082,0.153888,0.275494,0.411286,...,0.43155,0.065627,0.279459,0.266971,0.05569,0.286073,0.370544,0.306568,0.355896,0.427009
9,1.2.826.0.1.3680043.8.498.32689913031429495918...,0.49814,0.143177,0.205309,0.195642,0.79083,0.377499,0.145924,0.209813,0.342672,...,0.250451,0.502635,0.016645,0.149513,0.095956,0.245531,0.247173,0.081134,0.140821,0.04371


## AutoEncoderによる次元削減

In [7]:
autoencoder=load_model(f"{CFG.models_dir}autoencoder_splits10")
layer_name="dense_1"
hidden_layer_model=Model(inputs=autoencoder.input,outputs=autoencoder.get_layer(layer_name).output)

pred=hidden_layer_model.predict(input_df.iloc[:,1:])
features=pd.DataFrame(pred)
display(features)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,0.0,2.119918,2.163121,1.638946,1.774398,2.635539,2.673605,0.0,4.758869,2.559144,...,0.0,0.625272,2.247873,2.022233,1.800856,2.635757,3.127333,3.520048,2.474971,0.441091
1,0.0,6.994092,1.520887,6.04098,5.490166,6.354824,3.617496,0.0,4.291549,3.477937,...,0.0,0.802288,4.719494,3.308662,3.003226,3.323771,2.264355,2.295338,3.773238,3.643555
2,0.0,4.068262,2.226102,4.700896,4.807385,4.45591,4.222445,0.0,3.404594,5.438682,...,0.0,0.534936,5.008849,3.124704,2.355565,2.582374,2.943917,3.659654,4.58408,4.367057
3,0.0,3.802217,2.394319,1.924707,2.650949,2.074474,4.063462,0.0,5.06134,2.609853,...,0.0,0.931609,1.494502,5.468631,2.814267,5.034492,2.992605,5.976534,3.288343,3.697389
4,0.0,5.065516,1.367345,1.851728,6.016154,2.456596,3.236768,0.0,5.093782,4.536272,...,0.0,1.484924,3.19248,2.372754,3.575408,2.197106,2.391522,4.348204,3.665531,3.04424
5,0.0,4.018206,2.824412,3.413541,2.145518,4.25348,5.424338,0.0,3.980549,5.242438,...,0.0,0.513382,7.126354,4.003417,2.32712,1.742249,2.223447,4.478882,4.510432,5.91934
6,0.0,4.036886,1.722131,3.165283,1.4036,3.939363,2.435838,0.0,4.529491,6.601109,...,0.0,1.045035,4.207119,2.738962,3.11416,4.521884,2.896721,3.442829,1.166439,4.666277
7,0.0,5.05086,1.400325,7.252046,2.580305,5.157672,3.146062,0.0,1.395355,3.458122,...,0.0,1.762309,3.7676,3.649881,2.757871,3.681825,4.330676,3.529988,2.506806,3.011085
8,0.0,2.991557,0.888041,3.216361,4.019028,3.142505,2.05965,0.0,2.568055,5.940868,...,0.0,0.969799,4.074456,2.861773,1.722694,4.261704,1.689005,2.930497,3.842574,4.586554
9,0.0,3.775364,4.191988,2.737906,1.98805,4.231436,5.739973,0.0,4.257288,2.626848,...,0.0,1.439401,2.200976,5.199461,2.246084,3.191299,2.648046,5.860524,5.349447,4.809613


## 訓練済みLightGBMモデルによる推論

In [8]:
submission=test.copy()

for i,col_name in enumerate(CFG.target_cols):
    model=pickle.load(open(f"{CFG.models_dir}autoencoder_smallLR/lgb_model_{i+1}.pickle","rb"))
    pred=model.predict(features)
    submission.loc[:,col_name]=pred
    
submission.to_csv("submission.csv",index=False)
display(submission)

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present
2764,1.2.826.0.1.3680043.8.498.53187330532968389747...,3.185251e-06,0.010526,0.141956,0.011539,0.020773,0.011377,0.031457,0.18393,0.240046,0.622818,1e-05
2293,1.2.826.0.1.3680043.8.498.23628841926681382432...,3.84395e-07,0.000371,0.001602,0.002281,0.002176,6.1e-05,0.002445,0.030874,0.254463,0.68619,4e-06
916,1.2.826.0.1.3680043.8.498.95814725756838594874...,7.748613e-07,0.003852,0.010606,0.009762,0.007555,0.000644,0.018923,0.342428,0.235184,0.537059,2.6e-05
26,1.2.826.0.1.3680043.8.498.30188537311791900160...,2.981536e-05,0.03231,0.441387,0.00585,0.021594,0.008178,0.222218,0.06742,0.286824,0.840132,0.000784
2872,1.2.826.0.1.3680043.8.498.12633219127030026453...,7.223724e-07,0.004105,0.025737,0.002667,0.004933,0.001183,0.016607,0.113602,0.248911,0.776015,2.9e-05
2458,1.2.826.0.1.3680043.8.498.11169256056227577367...,1.298099e-05,0.029196,0.195649,0.007573,0.014218,0.012873,0.158682,0.089207,0.336271,0.670927,0.000161
1275,1.2.826.0.1.3680043.8.498.10205354356085088542...,3.935094e-06,0.011818,0.250393,0.006377,0.007944,0.020498,0.062057,0.12535,0.22857,0.776946,0.000849
1240,1.2.826.0.1.3680043.8.498.24239184506702911292...,4.255825e-07,0.001663,0.006533,0.002939,0.003734,0.000236,0.029199,0.078613,0.346147,0.751859,6e-06
3577,1.2.826.0.1.3680043.8.498.81464483108873296584...,3.653417e-06,0.029812,0.168171,0.006091,0.005628,0.006944,0.068484,0.136121,0.350397,0.573343,0.000179
510,1.2.826.0.1.3680043.8.498.32689913031429495918...,1.897673e-05,0.022418,0.394889,0.007548,0.020646,0.073976,0.175717,0.120453,0.410998,0.754833,0.001394
