In [1]:
import pandas as pd
import numpy as np
from IPython.display import display
from tqdm.notebook import tqdm
import pickle

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models

from sklearn.model_selection import GroupKFold

In [2]:
class CFG:
   debug=True
   dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
   models_dir="./models/" if debug else "../input/efficientnet-lightgbm-models/"
   batch_size=4 if debug else 128
   input_shape=(260,260)

   target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
      'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']
   n_folds=4

In [3]:
test=pd.read_csv(f"{CFG.dataset_dir}train.csv") if CFG.debug else pd.read_csv(f"{CFG.dataset_dir}sample_submission.csv")

In [4]:
with open(f"{CFG.models_dir}effnet_tuned/model.json","rt") as f:
    effnet_json=f.read()
effnet=models.model_from_json(effnet_json)
effnet.load_weights(f"{CFG.models_dir}effnet_tuned/weight.hdf5")

layer_name="global_average_pooling2d"
hidden_layer_model=models.Model(inputs=effnet.input,outputs=effnet.get_layer(layer_name).output)
hidden_layer_model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb7_input (InputL [(None, 260, 260, 3)]     0         
_________________________________________________________________
efficientnetb7 (Functional)  (None, 9, 9, 2560)        64097687  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2560)              0         
Total params: 64,097,687
Trainable params: 63,786,960
Non-trainable params: 310,727
_________________________________________________________________


In [5]:
def preprocess(record):
    if CFG.debug:
        file_bytes=tf.io.read_file(f"{CFG.dataset_dir}train/"+record+".jpg") # f"{}"形式ではTensorの型変換が働かない
    else:
        file_bytes=tf.io.read_file(f"{CFG.dataset_dir}test/"+record+".jpg")
    image=tf.io.decode_jpeg(file_bytes,channels=3)
    image=tf.cast(image,tf.float32)
    image=tf.image.resize(image,CFG.input_shape)
    image/=255
    return image

def decode_string(tensor):
    return [str_bytes.decode() for str_bytes in tensor.numpy()]


dset=tf.data.Dataset.from_tensor_slices(test["StudyInstanceUID"])
dset=dset.map(preprocess,num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(CFG.batch_size)

uid_dset=tf.data.Dataset.from_tensor_slices(test["StudyInstanceUID"]).batch(CFG.batch_size)

In [6]:
ndarray_dict={}
for imgs,uids in tqdm(zip(dset,uid_dset)):
    preds=hidden_layer_model.predict(imgs)
    uids_decoded=decode_string(uids)
    
    for uid,pred in zip(uids_decoded,preds):
        ndarray_dict[uid]=pred
    if CFG.debug:
        break

0it [00:00, ?it/s]

In [7]:
if CFG.debug:
    features_list=[ndarray_dict[uid] for uid in ndarray_dict.keys()]
else:
    features_list=[ndarray_dict[uid] for uid in tqdm(test["StudyInstanceUID"])]
features=np.array(features_list)

In [8]:
def compress_with_autoencoder(features):
    scaler=pickle.load(open("./models/minmaxscaler_effnet_tuned.pickle","rb"))
    X=scaler.transform(features)

    autoencoder_dir=f"{CFG.models_dir}autoencoder_tuned/"
    with open(f"{autoencoder_dir}model.json","rt") as f:
        model_json=f.read()
    autoencoder=models.model_from_json(model_json)
    autoencoder.load_weights(f"{autoencoder_dir}ckpt")

    layer_name="dense_1"
    compressing_model=models.Model(inputs=autoencoder.input,outputs=autoencoder.get_layer(layer_name).output)

    ae_pred=compressing_model.predict(X)
    ae_pred_df=pd.DataFrame(ae_pred)

    return ae_pred_df

# X=compress_with_autoencoder(features)



In [9]:
def create_model(fold:int):
    dense_dir=f"{CFG.models_dir}effnet_tuned_dense_withoutAE/"
    with open(f"{dense_dir}model.json","rt") as f:
        model_json=f.read()
    dense_model=models.model_from_json(model_json)
    dense_model.load_weights(f"{dense_dir}ckpt_{fold}")
    return dense_model

dense_model_list=[create_model(n) for n in range(CFG.n_folds)]

In [12]:
dense_pred_list=[pd.DataFrame(dense_model_list[n].predict(features),columns=CFG.target_cols) for n in range(CFG.n_folds)]

submission=test.copy()

for col_name in CFG.target_cols:
    pred_col=pd.DataFrame()
    for n,pred in enumerate(dense_pred_list):
        pred_col[f"fold_{n}"]=pred[col_name]
    submission[col_name]=pred_col.mean(axis=1)
display(submission)

if not CFG.debug:
    submission.to_csv("submission.csv",index=False)

Unnamed: 0,fold_0,fold_1,fold_2,fold_3
0,0.002316,0.003725,0.001753,0.00184
1,0.001032,0.001803,0.000864,0.000553
2,0.00014,0.000625,5.4e-05,3.4e-05
3,0.000773,0.002006,0.000594,0.000511
4,0.001517,0.002681,0.000889,0.001077
5,0.009185,0.012368,0.006619,0.006484
6,0.000116,0.000722,3.4e-05,1.8e-05
7,0.017139,0.018919,0.01072,0.009762


Unnamed: 0,fold_0,fold_1,fold_2,fold_3
0,0.003147,0.009438,0.001831,0.001539
1,0.037771,0.043849,0.03219,0.036346
2,0.000105,0.000581,3.3e-05,1.6e-05
3,0.002058,0.004864,0.001604,0.001116
4,0.001294,0.004938,0.000569,0.000417
5,0.12857,0.147799,0.147766,0.146079
6,0.000132,0.000608,4.7e-05,2.8e-05
7,0.097101,0.149593,0.086947,0.094671


Unnamed: 0,fold_0,fold_1,fold_2,fold_3
0,0.00522,0.01273,0.003448,0.004813
1,0.946967,0.926086,0.950689,0.950159
2,0.000109,0.000489,7.3e-05,5.9e-05
3,0.00195,0.005359,0.001302,0.000963
4,0.004767,0.011248,0.003821,0.003444
5,0.761013,0.690791,0.764391,0.75849
6,0.003411,0.009649,0.002551,0.002129
7,0.695039,0.598742,0.742728,0.718223


Unnamed: 0,fold_0,fold_1,fold_2,fold_3
0,0.031259,0.020622,0.027672,0.039753
1,0.042656,0.035592,0.039609,0.056895
2,0.000278,0.000886,0.000116,6.4e-05
3,0.001523,0.003381,0.000733,0.000408
4,0.016456,0.017316,0.010815,0.012338
5,0.023426,0.030027,0.027773,0.025252
6,0.000653,0.001682,0.000252,0.000222
7,0.055419,0.056134,0.046266,0.06753


Unnamed: 0,fold_0,fold_1,fold_2,fold_3
0,0.118515,0.07336,0.149856,0.14015
1,0.006178,0.009147,0.006636,0.006028
2,0.000328,0.001084,0.000119,8.3e-05
3,0.000677,0.002399,0.000341,0.000202
4,0.022588,0.022643,0.019214,0.017536
5,0.089784,0.093915,0.098772,0.111354
6,0.000621,0.001809,0.000282,0.000194
7,0.127443,0.150659,0.145304,0.15224


Unnamed: 0,fold_0,fold_1,fold_2,fold_3
0,0.049534,0.029932,0.057853,0.062825
1,0.852309,0.896922,0.862209,0.856086
2,6.8e-05,0.00039,1.8e-05,8e-06
3,0.002605,0.005803,0.001464,0.001055
4,0.013768,0.026264,0.011195,0.008274
5,0.030596,0.040367,0.030976,0.026905
6,0.000359,0.001873,0.000113,5.1e-05
7,0.011255,0.01838,0.010593,0.010022


Unnamed: 0,fold_0,fold_1,fold_2,fold_3
0,0.796005,0.674515,0.829795,0.81556
1,0.011235,0.015091,0.00808,0.010465
2,0.00011,0.00064,5.1e-05,2.8e-05
3,0.001431,0.002768,0.00112,0.000736
4,0.007506,0.018338,0.005427,0.003704
5,0.733451,0.720308,0.742893,0.728916
6,0.000159,0.000979,8.8e-05,5e-05
7,0.569242,0.629733,0.594245,0.555478


Unnamed: 0,fold_0,fold_1,fold_2,fold_3
0,0.177924,0.1645,0.182627,0.14379
1,0.035409,0.036019,0.039825,0.026254
2,0.053775,0.051515,0.048939,0.044629
3,0.559082,0.49009,0.558135,0.525578
4,0.007814,0.007831,0.00903,0.006999
5,0.095001,0.09966,0.091929,0.089758
6,0.100373,0.094517,0.085848,0.075507
7,0.085194,0.086557,0.076762,0.079307


Unnamed: 0,fold_0,fold_1,fold_2,fold_3
0,0.418132,0.409906,0.382422,0.420282
1,0.221942,0.211673,0.218631,0.200685
2,0.385649,0.352125,0.362784,0.40293
3,0.273789,0.268152,0.261636,0.281042
4,0.099306,0.083905,0.093074,0.097934
5,0.151611,0.142537,0.142393,0.148184
6,0.624018,0.607475,0.609965,0.643704
7,0.349033,0.337081,0.335458,0.351507


Unnamed: 0,fold_0,fold_1,fold_2,fold_3
0,0.214351,0.232492,0.210563,0.221307
1,0.926387,0.911924,0.930675,0.926447
2,0.59122,0.624725,0.601051,0.606948
3,0.261613,0.296324,0.27114,0.26082
4,0.947302,0.950571,0.952314,0.944797
5,0.750842,0.775496,0.777027,0.779792
6,0.624171,0.628503,0.64275,0.656178
7,0.504137,0.565989,0.536898,0.561092


Unnamed: 0,fold_0,fold_1,fold_2,fold_3
0,0.00031,0.001558,0.000251,0.000174
1,0.000935,0.0042,0.000536,0.000315
2,6.3e-05,0.000365,3.4e-05,1e-05
3,0.000199,0.000957,0.000145,6e-05
4,0.000747,0.002849,0.000318,0.000188
5,0.004351,0.01629,0.002422,0.001342
6,0.000273,0.001048,0.000154,5.8e-05
7,0.001722,0.007286,0.000738,0.000277


Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present,PatientID
0,1.2.826.0.1.3680043.8.498.26697628953273228189...,0.002409,0.003989,0.006553,0.029827,0.120470,0.050036,0.778969,0.167210,0.407685,0.219678,0.000573,ec89415d1
1,1.2.826.0.1.3680043.8.498.46302891597398758759...,0.001063,0.037539,0.943475,0.043688,0.006997,0.866881,0.011218,0.034377,0.213233,0.923858,0.001496,bf4c6da3c
2,1.2.826.0.1.3680043.8.498.23819260719748494858...,0.000213,0.000184,0.000183,0.000336,0.000404,0.000121,0.000207,0.049714,0.375872,0.605986,0.000118,3fc1c97e5
3,1.2.826.0.1.3680043.8.498.68286643202323212801...,0.000971,0.002411,0.002394,0.001511,0.000905,0.002732,0.001514,0.533221,0.271155,0.272474,0.000340,c31019814
4,1.2.826.0.1.3680043.8.498.10050203009225938259...,0.001541,0.001804,0.005820,0.014232,0.020495,0.014875,0.008744,0.007918,0.093555,0.948746,0.001026,207685cd1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
30078,1.2.826.0.1.3680043.8.498.74257566841157531124...,,,,,,,,,,,,5b5b9ac30
30079,1.2.826.0.1.3680043.8.498.46510939987173529969...,,,,,,,,,,,,7192404d8
30080,1.2.826.0.1.3680043.8.498.43173270582850645437...,,,,,,,,,,,,d4d1b066d
30081,1.2.826.0.1.3680043.8.498.95092491950130838685...,,,,,,,,,,,,01a6602b8
