In [17]:
import pandas as pd
import numpy as np
from IPython.display import display
from tqdm.notebook import tqdm
import pickle

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models

from sklearn.model_selection import GroupKFold

In [14]:
class CFG:
   debug=True
   dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
   models_dir="./models/" if debug else "../input/efficientnet-lightgbm-models/"
   batch_size=8
   input_shape=(260,260)

   target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
      'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']
   n_folds=4

In [8]:
test=pd.read_csv(f"{CFG.dataset_dir}train.csv")

In [7]:
with open(f"{CFG.models_dir}effnet_tuned/model.json","rt") as f:
    effnet_json=f.read()
effnet=models.model_from_json(effnet_json)
effnet.load_weights(f"{CFG.models_dir}effnet_tuned/weight.hdf5")

layer_name="global_average_pooling2d"
hidden_layer_model=models.Model(inputs=effnet.input,outputs=effnet.get_layer(layer_name).output)
hidden_layer_model.summary()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb7_input (InputL [(None, 260, 260, 3)]     0         
_________________________________________________________________
efficientnetb7 (Functional)  (None, 9, 9, 2560)        64097687  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2560)              0         
Total params: 64,097,687
Trainable params: 63,786,960
Non-trainable params: 310,727
_________________________________________________________________


In [15]:
def preprocess(record):
    file_bytes=tf.io.read_file(f"{CFG.dataset_dir}train/"+record+".jpg") # f"{}"形式ではTensorの型変換が働かない
    image=tf.io.decode_jpeg(file_bytes,channels=3)
    image=tf.cast(image,tf.float32)
    image=tf.image.resize(image,CFG.input_shape)
    image/=255
    return image

def decode_string(tensor):
    return [str_bytes.decode() for str_bytes in tensor.numpy()]


dset=tf.data.Dataset.from_tensor_slices(test["StudyInstanceUID"])
dset=dset.map(preprocess,num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(CFG.batch_size)

uid_dset=tf.data.Dataset.from_tensor_slices(test["StudyInstanceUID"]).batch(CFG.batch_size)

In [28]:
ndarray_dict={}
for imgs,uids in tqdm(zip(dset,uid_dset)):
    preds=hidden_layer_model.predict(imgs)
    uids_decoded=decode_string(uids)
    
    for uid,pred in zip(uids_decoded,preds):
        ndarray_dict[uid]=pred

0it [00:00, ?it/s]

{'1.2.826.0.1.3680043.8.498.26697628953273228189375557799582420561': array([-0.08567494,  0.00229176,  0.01544561, ..., -0.06187971,
         0.00742798,  0.01092294], dtype=float32),
 '1.2.826.0.1.3680043.8.498.46302891597398758759818628675365157729': array([-0.07505051, -0.00155438, -0.01137567, ...,  0.05553032,
        -0.02079361, -0.01061307], dtype=float32),
 '1.2.826.0.1.3680043.8.498.23819260719748494858948050424870692577': array([-0.06143351, -0.01030844, -0.00388086, ..., -0.00936987,
        -0.00535191, -0.00462459], dtype=float32),
 '1.2.826.0.1.3680043.8.498.68286643202323212801283518367144358744': array([-0.08273388, -0.01611442,  0.00568651, ...,  0.02443826,
        -0.00774911,  0.00436746], dtype=float32),
 '1.2.826.0.1.3680043.8.498.10050203009225938259119000528814762175': array([-0.02100174,  0.00621759,  0.00972148, ..., -0.01103061,
         0.00239548,  0.00745044], dtype=float32),
 '1.2.826.0.1.3680043.8.498.11707076266253086830850990314496191758': array([-0.0

In [27]:
features_list=[ndarray_dict[uid] for uid in tqdm(test["StudyInstanceUID"])]
features=np.array(features_list)

  0%|          | 0/30083 [00:00<?, ?it/s]

KeyError: '1.2.826.0.1.3680043.8.498.91266853519846682206191056121106632892'

In [36]:
scaler=pickle.load(open(f"{CFG.models_dir}minmaxscaler_effnet_tuned.pickle","rb"))
X=scaler.transform(features)

In [37]:
autoencoder_dir=f"{CFG.models_dir}autoencoder_tuned/"
with open(f"{autoencoder_dir}model.json","rt") as f:
    model_json=f.read()
autoencoder=models.model_from_json(model_json)
autoencoder.load_weights(f"{autoencoder_dir}ckpt")
autoencoder.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1800)              4609800   
_________________________________________________________________
dense_1 (Dense)              (None, 100)               180100    
_________________________________________________________________
dense_2 (Dense)              (None, 1800)              181800    
_________________________________________________________________
dropout (Dropout)            (None, 1800)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 2560)              4610560   
Total params: 9,582,260
Trainable params: 9,582,260
Non-trainable params: 0
_________________________________________________________________


In [38]:
layer_name="dense_1"
compressing_model=models.Model(inputs=autoencoder.input,outputs=autoencoder.get_layer(layer_name).output)

ae_pred=compressing_model.predict(X)
display(ae_pred.shape)

(8, 100)

In [39]:
dense_dir=f"{CFG.models_dir}effnet_tuned_dense/"
with open(f"{dense_dir}model.json","rt") as f:
    model_json=f.read()
dense_model=models.model_from_json(model_json)
dense_model.load_weights(f"{dense_dir}ckpt")
dense_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 11)                1111      
Total params: 1,111
Trainable params: 1,111
Non-trainable params: 0
_________________________________________________________________


In [42]:
dense_pred=pd.DataFrame(dense_model.predict(ae_pred),columns=CFG.target_cols)
submission=pd.concat([test["StudyInstanceUID"].reset_index(drop=True),dense_pred],axis=1)
display(submission)

submission.to_csv("submission.csv",index=False)

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present
0,1.2.826.0.1.3680043.8.498.26697628953273228189...,0.002183,0.005840,0.009135,0.063458,0.191302,0.090042,0.839361,0.170036,0.473462,0.182635,0.000447
1,1.2.826.0.1.3680043.8.498.46302891597398758759...,0.002403,0.034220,0.935055,0.063382,0.010251,0.852127,0.013372,0.021895,0.179859,0.942377,0.001326
2,1.2.826.0.1.3680043.8.498.23819260719748494858...,0.000396,0.000668,0.000454,0.001235,0.001506,0.000504,0.000924,0.057376,0.345900,0.624623,0.000219
3,1.2.826.0.1.3680043.8.498.68286643202323212801...,0.001456,0.008681,0.004323,0.003235,0.001273,0.008788,0.005655,0.506668,0.320332,0.247447,0.000321
4,1.2.826.0.1.3680043.8.498.10050203009225938259...,0.002330,0.003120,0.008720,0.013459,0.018858,0.025655,0.011007,0.005441,0.104196,0.945665,0.000539
...,...,...,...,...,...,...,...,...,...,...,...,...
30078,1.2.826.0.1.3680043.8.498.74257566841157531124...,,,,,,,,,,,
30079,1.2.826.0.1.3680043.8.498.46510939987173529969...,,,,,,,,,,,
30080,1.2.826.0.1.3680043.8.498.43173270582850645437...,,,,,,,,,,,
30081,1.2.826.0.1.3680043.8.498.95092491950130838685...,,,,,,,,,,,
