In [1]:
import pandas as pd
import numpy as np
import os
from IPython.display import display
from tqdm.notebook import tqdm
import tensorflow as tf
from tensorflow.keras import models
from sklearn.metrics import roc_auc_score
from glob import glob

In [2]:
class CFG:
    dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
    input_shape=(260,260)
    batch_size=8
    target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']

## ndarrayを保存する

In [3]:
train=pd.read_csv(f"{CFG.dataset_dir}train.csv")
display(train)

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present,PatientID
0,1.2.826.0.1.3680043.8.498.26697628953273228189...,0,0,0,0,0,0,1,0,0,0,0,ec89415d1
1,1.2.826.0.1.3680043.8.498.46302891597398758759...,0,0,1,0,0,1,0,0,0,1,0,bf4c6da3c
2,1.2.826.0.1.3680043.8.498.23819260719748494858...,0,0,0,0,0,0,0,0,1,0,0,3fc1c97e5
3,1.2.826.0.1.3680043.8.498.68286643202323212801...,0,0,0,0,0,0,0,1,0,0,0,c31019814
4,1.2.826.0.1.3680043.8.498.10050203009225938259...,0,0,0,0,0,0,0,0,0,1,0,207685cd1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
30078,1.2.826.0.1.3680043.8.498.74257566841157531124...,0,0,1,0,0,0,0,0,1,1,0,5b5b9ac30
30079,1.2.826.0.1.3680043.8.498.46510939987173529969...,0,0,0,0,0,0,0,0,0,1,0,7192404d8
30080,1.2.826.0.1.3680043.8.498.43173270582850645437...,0,0,1,0,0,1,0,1,0,1,0,d4d1b066d
30081,1.2.826.0.1.3680043.8.498.95092491950130838685...,0,0,0,0,0,0,0,0,1,0,0,01a6602b8


In [4]:
with open("./models/effnet_tuned/model.json","rt") as f:
    effnet=models.model_from_json(f.read())
effnet.load_weights("./models/effnet_tuned/weight_best")
effnet.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb7 (Functional)  (None, 9, 9, 2560)        64097687  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2560)              0         
_________________________________________________________________
dense (Dense)                (None, 11)                28171     
Total params: 64,125,858
Trainable params: 63,815,131
Non-trainable params: 310,727
_________________________________________________________________


In [5]:
layer_name="global_average_pooling2d"
hidden_layer_model=models.Model(inputs=effnet.input,outputs=effnet.get_layer(layer_name).output)
hidden_layer_model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb7_input (InputL [(None, 260, 260, 3)]     0         
_________________________________________________________________
efficientnetb7 (Functional)  (None, 9, 9, 2560)        64097687  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2560)              0         
Total params: 64,097,687
Trainable params: 63,786,960
Non-trainable params: 310,727
_________________________________________________________________


In [7]:
def preprocess(record):
    file_bytes=tf.io.read_file(f"{CFG.dataset_dir}train/"+record+".jpg") # 埋め込み形式ではTensorの型変換が働かない
    image=tf.io.decode_jpeg(file_bytes,channels=3)
    image=tf.cast(image,tf.float32)
    image=tf.image.resize(image,CFG.input_shape)
    image/=255
    return image

def decode_string(tensor):
    return [str_bytes.decode() for str_bytes in tensor.numpy()]

AUTO=tf.data.experimental.AUTOTUNE
dset=tf.data.Dataset.from_tensor_slices(train["StudyInstanceUID"])
dset=dset.map(preprocess,num_parallel_calls=AUTO).batch(CFG.batch_size).prefetch(AUTO)

uid_dset=tf.data.Dataset.from_tensor_slices(train["StudyInstanceUID"]).batch(CFG.batch_size).prefetch(AUTO)

In [8]:
for imgs,uids in tqdm(zip(dset,uid_dset)):
    preds=hidden_layer_model.predict(imgs)
    uids_decoded=decode_string(uids)
    
    for uid,pred in zip(uids_decoded,preds):
        np.save(f"../input/effnet_tuned_output/{uid}",pred)

0it [00:00, ?it/s]

In [99]:
train_part=train[train["StudyInstanceUID"].isin(output["StudyInstanceUID"])]

result=pd.DataFrame(columns=CFG.target_cols)
for col_name in CFG.target_cols:
    try:
        result.loc["AUC",col_name]=roc_auc_score(train_part[col_name],output[col_name])
    except:
        pass
display(result,result.mean(axis=1))

Unnamed: 0,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present
AUC,,0.983871,0.998748,0.666667,0.984127,0.935268,0.982372,0.921875,0.871089,0.893688,


AUC    0.9153
dtype: float64

## NPZにまとめる

In [9]:
ndarray_dict={}
for path in tqdm(glob("../input/effnet_tuned_output/*")):
    filename=os.path.basename(path)
    uid=os.path.splitext(filename)[0]
    ndarray_dict[uid]=np.load(path)

np.savez_compressed("../input/effnet_tuned_output",**ndarray_dict)

  0%|          | 0/30083 [00:00<?, ?it/s]

In [46]:
npz=np.load("../input/effnet_tuned_output.npz")

In [49]:
%%timeit
features_list=[]
append=features_list.append
for uid in tqdm(train["StudyInstanceUID"]):
    append(npz[uid])
features=np.array(features_list)

  0%|          | 0/30083 [00:00<?, ?it/s]

  0%|          | 0/30083 [00:00<?, ?it/s]

  0%|          | 0/30083 [00:00<?, ?it/s]

  0%|          | 0/30083 [00:00<?, ?it/s]

  0%|          | 0/30083 [00:00<?, ?it/s]

  0%|          | 0/30083 [00:00<?, ?it/s]

  0%|          | 0/30083 [00:00<?, ?it/s]

  0%|          | 0/30083 [00:00<?, ?it/s]

49.7 s ± 2.13 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [48]:
%%timeit
comprehension_list=[npz[uid] for uid in train["StudyInstanceUID"]]
comprehension=np.array(comprehension_list)

0
0
0
0
0
0
0
0
47 s ± 2.83 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
