In [1]:
import pandas as pd
from IPython.display import display
from sklearn.model_selection import GroupKFold
import matplotlib.pyplot as plt
import numpy as np
import cv2
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras import models
from tensorflow.io import FixedLenFeature

In [2]:
class CFG:
    debug=True if "get_ipython" in globals() else False
    dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
    models_dir="./models/"
    n_folds=4
    input_shape=(260,260)
    batch_size=8 if debug else 256
    target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged',           'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']

print(f"CFG.debug: {CFG.debug}")
train=pd.read_csv(CFG.dataset_dir+"train.csv")

CFG.debug: True


In [3]:
def visualize_annotations(file_id):
    plt.figure(figsize=(15, 8))
    
    image = cv2.imread(f"{dataset_dir}train/{file_id}.jpg")
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.imshow(image)
    
    df_patient = annot_df.loc[annot_df["StudyInstanceUID"] == file_id]
    
    if df_patient.shape[0]:        
        labels = df_patient["label"].values.tolist()
        lines = df_patient["data"].apply(eval).values.tolist()

        for line, label in zip(lines, labels):         
            line = np.asarray(line)
            plt.scatter(line[:, 0], line[:, 1], s=40, label=label)
        
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0, prop={'size': 20})
        
    plt.tick_params(axis="x", labelsize=15)
    plt.tick_params(axis="y", labelsize=15)
    
    plt.savefig(f"../input/annotated/{file_id}.png")

In [4]:
with open(f"{CFG.models_dir}effnet_tuned/model.json","rt") as f:
    effnet_json=f.read()
effnet=models.model_from_json(effnet_json)
effnet.load_weights(f"{CFG.models_dir}effnet_tuned/weight.hdf5")
effnet.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb7 (Functional)  (None, 9, 9, 2560)        64097687  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2560)              0         
_________________________________________________________________
dense (Dense)                (None, 11)                28171     
Total params: 64,125,858
Trainable params: 63,815,131
Non-trainable params: 310,727
_________________________________________________________________


In [5]:
def get_fold(train):
    fold=train.copy()
    splitter=GroupKFold(n_splits=CFG.n_folds)
    for n,(train_idx,val_idx) in enumerate(splitter.split(train,groups=train["PatientID"])):
        fold.loc[val_idx,"folds"]=n
    fold["folds"]=fold["folds"].astype(int)
    return fold

fold=get_fold(train)

In [6]:
feature_description={
    "CVC - Abnormal":FixedLenFeature([],tf.int64),
    "CVC - Borderline":FixedLenFeature([],tf.int64),
    "CVC - Normal":FixedLenFeature([],tf.int64),
    "ETT - Abnormal":FixedLenFeature([],tf.int64),
    "ETT - Borderline":FixedLenFeature([],tf.int64),
    "ETT - Normal":FixedLenFeature([],tf.int64),
    "NGT - Abnormal":FixedLenFeature([],tf.int64),
    "NGT - Borderline":FixedLenFeature([],tf.int64),
    "NGT - Incompletely Imaged":FixedLenFeature([],tf.int64),
    "NGT - Normal":FixedLenFeature([],tf.int64),
    "Swan Ganz Catheter Present":FixedLenFeature([],tf.int64),
    "StudyInstanceUID":FixedLenFeature([],tf.string),
    "image":FixedLenFeature([],tf.string),
}

def parse_examples(example):
    return tf.io.parse_example(example,feature_description)

In [7]:
def preprocess(record):
    image_bytes=record["image"]
    image=tf.io.decode_png(image_bytes,channels=3)
    image=tf.cast(image,tf.float32)
    image=tf.image.resize(image,CFG.input_shape)
    image/=255
    uid=record["StudyInstanceUID"]
    return image,uid

def decode_string(tensor):
    return [str_bytes.decode() for str_bytes in tensor.numpy()]

AUTO=tf.data.experimental.AUTOTUNE
recordname="../input/train_300x300.tfrec"
raw_dataset=tf.data.TFRecordDataset(recordname)
dset=raw_dataset.map(parse_examples,num_parallel_calls=AUTO)
dset=dset.map(preprocess,num_parallel_calls=AUTO)
dset=dset.batch(CFG.batch_size).prefetch(AUTO)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [21]:
submission=train.copy()

for imgs,uids in tqdm(dset):
    preds=effnet.predict(imgs)
    uids_decoded=decode_string(uids)
    
    for pred,uid in zip(preds,uids_decoded):
        submission.loc[submission["StudyInstanceUID"]==uid,CFG.target_cols]=pred
    
display(submission)
if not CFG.debug:
    submission.to_csv("effnet_tuned_pred.csv",index=False)

0it [00:00, ?it/s]

['1.2.826.0.1.3680043.8.498.10000428974990117276582711948006105617',
 '1.2.826.0.1.3680043.8.498.10001065121843652267743449160233082683',
 '1.2.826.0.1.3680043.8.498.10001175380298620851477409998730672515',
 '1.2.826.0.1.3680043.8.498.10001274045312501651093242392099983211',
 '1.2.826.0.1.3680043.8.498.10001645884963994872672157437761279872',
 '1.2.826.0.1.3680043.8.498.10002090428422465477470152567720822894',
 '1.2.826.0.1.3680043.8.498.10002446304107330308555550280339793610',
 '1.2.826.0.1.3680043.8.498.10002644592366004745415171692737199745']

1it [00:04,  4.60s/it]

['1.2.826.0.1.3680043.8.498.10003638361010097105432298560780077394',
 '1.2.826.0.1.3680043.8.498.10004680757941111877058507971361382698',
 '1.2.826.0.1.3680043.8.498.10005525179325187531381719227940587547',
 '1.2.826.0.1.3680043.8.498.10005674150895565088475033916741013432',
 '1.2.826.0.1.3680043.8.498.10006237848798781176388878065969229683',
 '1.2.826.0.1.3680043.8.498.10006345652257803920839267162509630051',
 '1.2.826.0.1.3680043.8.498.10006912914592266370941646234626874697',
 '1.2.826.0.1.3680043.8.498.10007325230839039576266816449875945674']

2it [00:09,  4.56s/it]

['1.2.826.0.1.3680043.8.498.10008193698745944671737283077530844393',
 '1.2.826.0.1.3680043.8.498.10008918603569082587093755882910898138',
 '1.2.826.0.1.3680043.8.498.10009200682227193533239127301911164377',
 '1.2.826.0.1.3680043.8.498.10009230449649555463787687058581068839',
 '1.2.826.0.1.3680043.8.498.10009460636373382407643984386773372138',
 '1.2.826.0.1.3680043.8.498.10009804582155067294620004418359998775',
 '1.2.826.0.1.3680043.8.498.10010621324226224265011850078370952894',
 '1.2.826.0.1.3680043.8.498.10010886604924023073322417250625757580']

3it [00:13,  4.54s/it]

['1.2.826.0.1.3680043.8.498.10011225322111935956709340361664308988',
 '1.2.826.0.1.3680043.8.498.10011303716164813363951330003247248699',
 '1.2.826.0.1.3680043.8.498.10011326142914171276323218618410985958',
 '1.2.826.0.1.3680043.8.498.10011467445158645691510394489893787804',
 '1.2.826.0.1.3680043.8.498.10011645222415989937696288265656661857',
 '1.2.826.0.1.3680043.8.498.10012076044416201719708790381665812951',
 '1.2.826.0.1.3680043.8.498.10012289245645788930532993508716817528',
 '1.2.826.0.1.3680043.8.498.10012868114746340015145674058758665450']

3it [00:18,  6.04s/it]


Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present,PatientID
0,1.2.826.0.1.3680043.8.498.26697628953273228189...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,ec89415d1
1,1.2.826.0.1.3680043.8.498.46302891597398758759...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,bf4c6da3c
2,1.2.826.0.1.3680043.8.498.23819260719748494858...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,3fc1c97e5
3,1.2.826.0.1.3680043.8.498.68286643202323212801...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,c31019814
4,1.2.826.0.1.3680043.8.498.10050203009225938259...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,207685cd1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
30078,1.2.826.0.1.3680043.8.498.74257566841157531124...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,5b5b9ac30
30079,1.2.826.0.1.3680043.8.498.46510939987173529969...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,7192404d8
30080,1.2.826.0.1.3680043.8.498.43173270582850645437...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,d4d1b066d
30081,1.2.826.0.1.3680043.8.498.95092491950130838685...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,01a6602b8


In [24]:
display(submission[(submission["ETT - Abnormal"]!=0)&(submission["ETT - Abnormal"]!=1)].shape)

(32, 13)

In [9]:
for imgs,uids in tqdm(dset):
    if imgs.shape !=(CFG.batch_size,260,260,3):
        display(imgs)

3759it [01:04, 43.72it/s]

<tf.Tensor: shape=(3, 260, 260, 3), dtype=float32, numpy=
array([[[[0.02420234, 0.02420234, 0.02420234],
         [0.0367328 , 0.0367328 , 0.0367328 ],
         [0.03951735, 0.03951735, 0.03951735],
         ...,
         [0.03438915, 0.03438915, 0.03438915],
         [0.07696924, 0.07696924, 0.07696924],
         [0.29812938, 0.29812938, 0.29812938]],

        [[0.0214178 , 0.0214178 , 0.0214178 ],
         [0.04313726, 0.04313726, 0.04313726],
         [0.04313726, 0.04313726, 0.04313726],
         ...,
         [0.04107206, 0.04107206, 0.04107206],
         [0.0686853 , 0.0686853 , 0.0686853 ],
         [0.27042335, 0.27042335, 0.27042335]],

        [[0.0214178 , 0.0214178 , 0.0214178 ],
         [0.04313726, 0.04313726, 0.04313726],
         [0.04313726, 0.04313726, 0.04313726],
         ...,
         [0.03921569, 0.03921569, 0.03921569],
         [0.06450849, 0.06450849, 0.06450849],
         [0.25336814, 0.25336814, 0.25336814]],

        ...,

        [[0.02810072, 0.02810072, 

3761it [01:04, 58.46it/s]
