In [1]:
import pandas as pd
import numpy as np
from IPython.display import display
from tqdm.notebook import tqdm
import pickle

import tensorflow as tf
from tensorflow.keras import models

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import efficientnet.tfkeras

In [2]:
class CFG:
    debug=True

    dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
    models_dir="./models/" if debug else "../input/ranzcr-chibamed/"
    batch_size=4 if debug else 128
    input_shape=(600,600)

    target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged',           'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']

In [3]:
test=pd.read_csv(f"{CFG.dataset_dir}train.csv") if CFG.debug else pd.read_csv(f"{CFG.dataset_dir}sample_submission.csv")

In [4]:
effnet=models.load_model(f"{CFG.models_dir}xhlulu/model.h5")
layer_name="global_average_pooling2d"
hidden_layer_model=models.Model(inputs=effnet.input,outputs=effnet.get_layer(layer_name).output)
hidden_layer_model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnet-b7_input (Input [(None, 600, 600, 3)]     0         
_________________________________________________________________
efficientnet-b7 (Functional) (None, 19, 19, 2560)      64097680  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2560)              0         
Total params: 64,097,680
Trainable params: 63,786,960
Non-trainable params: 310,720
_________________________________________________________________


In [5]:
def preprocess(uid):
    if CFG.debug:
        file_bytes=tf.io.read_file(f"{CFG.dataset_dir}train/"+uid+".jpg") # f"{}"形式ではTensorの型変換が働かない
    else:
        file_bytes=tf.io.read_file(f"{CFG.dataset_dir}test/"+uid+".jpg")
    image=tf.io.decode_jpeg(file_bytes,channels=3)
    image=tf.cast(image,tf.float32)
    image=tf.image.resize(image,CFG.input_shape)
    image/=255
    return uid,image

def decode_string(tensor):
    return [str_bytes.decode() for str_bytes in tensor.numpy()]

AUTO=tf.data.experimental.AUTOTUNE
dset=tf.data.Dataset.from_tensor_slices(test["StudyInstanceUID"])
dset=dset.map(preprocess,num_parallel_calls=AUTO)
dset=dset.batch(CFG.batch_size).prefetch(AUTO)

In [6]:
ndarray_dict={}
for uids,imgs in tqdm(dset):
    preds=hidden_layer_model.predict(imgs)
    uids_decoded=decode_string(uids)
    for uid,pred in zip(uids_decoded,preds):
        ndarray_dict[uid]=pred

    if CFG.debug:
        break

  0%|          | 0/7521 [00:00<?, ?it/s]

In [7]:
if CFG.debug:
    features_list=[ndarray_dict[uid] for uid in ndarray_dict.keys()]
else:
    features_list=[ndarray_dict[uid] for uid in tqdm(test["StudyInstanceUID"])]
features=np.array(features_list)

In [8]:
def compress_with_autoencoder(features):
    scaler=pickle.load(open(f"{CFG.models_dir}minmaxscaler_effnet_best.pickle","rb"))
    X=scaler.transform(features)

    autoencoder_dir=f"{CFG.models_dir}autoencoder_best/"
    with open(f"{autoencoder_dir}model.json","rt") as f:
        model_json=f.read()
    autoencoder=models.model_from_json(model_json)
    autoencoder.load_weights(f"{autoencoder_dir}ckpt")

    layer_name="dense_1"
    compressing_model=models.Model(inputs=autoencoder.input,outputs=autoencoder.get_layer(layer_name).output)

    ae_pred=compressing_model.predict(X)
    ae_pred_df=pd.DataFrame(ae_pred)

    return ae_pred_df

X=compress_with_autoencoder(features)



In [9]:
valuless_columns=[3, 4, 6, 8, 9, 13, 14, 17, 18, 23, 27, 35, 36, 37, 38, 44, 45,
            47, 50, 51, 52, 57, 58, 61, 62, 67, 68, 72, 73, 74, 76, 85, 86, 87,
            90, 91, 92, 98, 99]
X_dropped=X.drop(columns=valuless_columns)
display(X_dropped)

Unnamed: 0,0,1,2,5,7,10,11,12,15,16,...,82,83,84,88,89,93,94,95,96,97
0,2.673171,2.893724,0.450781,1.060409,2.383523,0.602805,0.247275,4.546678,2.601019,2.746942,...,2.845104,1.222311,2.439301,0.743164,5.553793,1.139012,2.191009,1.756444,1.531287,5.458641
1,2.044054,1.257803,4.117214,1.095964,2.815533,2.502601,2.922095,1.949331,4.941281,0.661702,...,1.585451,3.151549,4.557868,2.256857,1.380925,2.510921,1.794874,1.713381,1.276319,0.395076
2,0.957074,2.805676,2.284654,0.570213,1.604304,1.60777,1.134657,3.9697,2.496218,1.417869,...,1.166086,0.489576,1.096345,0.0,1.794175,1.730872,2.893353,1.455251,1.80405,2.715652
3,0.934214,4.42417,3.021042,0.521625,2.399586,1.063171,0.865111,3.438043,2.433294,3.138256,...,1.842545,1.764452,2.348569,0.0,3.490618,2.038463,2.977326,1.24814,1.353673,2.733519


In [11]:
submission=test.copy()

for i,col_name in enumerate(CFG.target_cols):
    model=pickle.load(open(f"{CFG.models_dir}lgbm_effnet_best_dropped/model_{i}.pickle","rb"))
    pred=model.predict(X_dropped)
    submission[col_name]=pd.Series(pred)

if not CFG.debug:
    submission.to_csv("submission.csv",index=False)
display(submission)

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present,PatientID
0,1.2.826.0.1.3680043.8.498.26697628953273228189...,0.003388,0.020587,0.588440,0.021597,0.017320,0.680882,0.814131,0.459130,0.279040,0.256390,0.000303,ec89415d1
1,1.2.826.0.1.3680043.8.498.46302891597398758759...,0.003813,0.024238,0.508809,0.018454,0.016593,0.619067,0.883625,0.292774,0.330958,0.375061,0.001885,bf4c6da3c
2,1.2.826.0.1.3680043.8.498.23819260719748494858...,0.016453,0.024301,0.507936,0.012596,0.018927,0.645346,0.922774,0.353919,0.387217,0.391197,0.000268,3fc1c97e5
3,1.2.826.0.1.3680043.8.498.68286643202323212801...,0.003306,0.029268,0.555438,0.019360,0.017148,0.609939,0.870666,0.395821,0.328002,0.368925,0.009753,c31019814
4,1.2.826.0.1.3680043.8.498.10050203009225938259...,,,,,,,,,,,,207685cd1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
30078,1.2.826.0.1.3680043.8.498.74257566841157531124...,,,,,,,,,,,,5b5b9ac30
30079,1.2.826.0.1.3680043.8.498.46510939987173529969...,,,,,,,,,,,,7192404d8
30080,1.2.826.0.1.3680043.8.498.43173270582850645437...,,,,,,,,,,,,d4d1b066d
30081,1.2.826.0.1.3680043.8.498.95092491950130838685...,,,,,,,,,,,,01a6602b8
