In [1]:
import os

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow.keras.applications.efficientnet as efn

## Helper functions

The following functions are hidden:
```python
auto_select_accelerator()

build_decoder(with_labels=True, target_size=(256, 256), ext='jpg')

build_augmenter(with_labels=True)

build_dataset(paths, labels=None, bsize=32, cache=True,
              decode_fn=None, augment_fn=None,
              augment=True, repeat=True, shuffle=1024, 
              cache_dir="")
```

Unhide below to see:

In [2]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
import tensorflow as tf

def auto_select_accelerator():
    # Check if GPU is available
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        try:
            # Enable GPU memory growth to avoid allocating all GPU memory at once
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)

            strategy = tf.distribute.OneDeviceStrategy(device="/GPU:0")
            print("Running on GPU")
        except RuntimeError as e:
            print(e)
            strategy = tf.distribute.get_strategy()
    else:
        strategy = tf.distribute.get_strategy()
        print("Running on CPU")

    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    return strategy


def build_decoder(with_labels=True, target_size=(300, 300), ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)
        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")

        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)

        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode


def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if with_labels else augment


def build_dataset(paths, labels=None, bsize=32, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=1024, 
                  cache_dir=""):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    
    return dset

## Variables and configurations

In [4]:
COMPETITION_NAME = "ranzcr-clip-catheter-line-classification"
strategy = auto_select_accelerator()
BATCH_SIZE = strategy.num_replicas_in_sync * 16
#GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME)

Running on GPU
Running on 1 replicas


## Preparing dataset

### Loading and preprocess CSVs

In [5]:
########################### GPU ##########################
load_dir = f"C:/Users/srika/Dropbox/PC/Downloads/{COMPETITION_NAME}/"
df = pd.read_csv(load_dir + 'train.csv')
paths = load_dir + "train/" + df['StudyInstanceUID'] + '.jpg'

sub_df = pd.read_csv(load_dir + 'sample_submission.csv')
test_paths = load_dir + "test/" + sub_df['StudyInstanceUID'] + '.jpg'

# Get the multi-labels
label_cols = sub_df.columns[1:]
labels = df[label_cols].values

In [7]:

len(paths)

30083

In [8]:
# Train test split
(
    train_paths, valid_paths, 
    train_labels, valid_labels
) = train_test_split(paths, labels, test_size=0.2, random_state=42)

In [9]:
# Build the tensorflow datasets
IMSIZES = (224, 240, 260, 300, 320,340,480, 528, 600)
# index i corresponds to b-i
size = IMSIZES[0]

decoder = build_decoder(with_labels=True, target_size=(size, size))
test_decoder = build_decoder(with_labels=False, target_size=(size, size))

# Build the tensorflow datasets
dtrain = build_dataset(
    train_paths, train_labels, bsize=BATCH_SIZE, 
    cache_dir='C:/Users/srika/Dropbox/PC/Downloads/{COMPETITION_NAME}/tf_cache', decode_fn=decoder
)

dvalid = build_dataset(
    valid_paths, valid_labels, bsize=BATCH_SIZE, 
    repeat=False, shuffle=False, augment=False, 
    cache_dir='C:/Users/srika/Dropbox/PC/Downloads/{COMPETITION_NAME}/tf_cache', decode_fn=decoder
)

dtest = build_dataset(
    test_paths, bsize=BATCH_SIZE, repeat=False, 
    shuffle=False, augment=False, cache=False, 
    decode_fn=test_decoder
)

In [10]:
size

224

## Modeling

In [15]:
n_labels = labels.shape[1]

with strategy.scope():
    model_Xception= tf.keras.Sequential([
        tf.keras.applications.Xception(
            input_shape=(size, size, 3),
            weights='imagenet',
            include_top=False,
        ),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(n_labels, activation='sigmoid')
    ])
    model_Xception.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=[tf.keras.metrics.AUC(multi_label=True)])
    model_Xception.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 xception (Functional)       (None, 7, 7, 2048)        20861480  
                                                                 
 global_average_pooling2d_1   (None, 2048)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dense_1 (Dense)             (None, 11)                22539     
                                                                 
Total params: 20,884,019
Trainable params: 20,829,491
Non-trainable params: 54,528
_________________________________________________________________


In [13]:
steps_per_epoch = train_paths.shape[0] // BATCH_SIZE
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'modelcat.h5', save_best_only=True, monitor='val_auc', mode='max')
lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_auc", patience=3, min_lr=1e-6, mode='max')

In [20]:
history = model_Xception.fit(
    dtrain, 
    epochs=5,
    verbose=1,
    callbacks=[checkpoint, lr_reducer],
    steps_per_epoch=steps_per_epoch,
    validation_data=dvalid)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [21]:
model_Xception.save('Xception_attention_full_data3.h5')

In [22]:
import numpy as np

# Step 3: Predict using the model
def predict_with_model(model, dataset):
    # Create an empty list to store the predictions
    predictions = []

    # Iterate through the dataset and make predictions
    for images in dataset:
        batch_predictions = model.predict(images)
        predictions.extend(batch_predictions)

    # Convert predictions to numpy array
    predictions = np.array(predictions)

    return predictions

# Make predictions using the model
predictions = predict_with_model(model_Xception, dtest)

# 'predictions' will contain the predicted probabilities for each class.
# You can further process these probabilities to obtain the final predicted labels.






In [23]:
predictions

array([[9.1900546e-03, 5.8893460e-01, 2.2363214e-01, ..., 1.1929734e-01,
        9.9552274e-01, 1.0000000e+00],
       [1.0320927e-17, 2.4848697e-13, 3.4848852e-13, ..., 5.0613908e-03,
        9.9920851e-01, 4.2300612e-12],
       [7.7792578e-10, 1.5243602e-07, 2.9104194e-06, ..., 1.2869105e-01,
        9.0024972e-01, 5.1007063e-08],
       ...,
       [1.2526871e-10, 3.6398564e-08, 1.5892835e-08, ..., 3.9421925e-03,
        9.9835950e-01, 6.7103101e-10],
       [3.3583422e-10, 1.7522881e-08, 3.9405879e-08, ..., 6.3838351e-01,
        1.4633629e-01, 1.7702591e-06],
       [2.4362098e-08, 3.0758048e-07, 5.4771458e-08, ..., 3.9290449e-01,
        6.7803639e-01, 2.1696319e-03]], dtype=float32)

In [24]:
import pandas as pd 
import numpy as np
import os
import tensorflow as tf


In [28]:
!streamlit run prod2.py

Traceback (most recent call last):
  File "C:\Users\srika\anaconda3\lib\runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\srika\anaconda3\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "C:\Users\srika\anaconda3\Scripts\streamlit.exe\__main__.py", line 4, in <module>
  File "C:\Users\srika\anaconda3\lib\site-packages\streamlit\__init__.py", line 55, in <module>
    from streamlit.delta_generator import DeltaGenerator as _DeltaGenerator
  File "C:\Users\srika\anaconda3\lib\site-packages\streamlit\delta_generator.py", line 36, in <module>
    from streamlit import config, cursor, env_util, logger, runtime, type_util, util
  File "C:\Users\srika\anaconda3\lib\site-packages\streamlit\cursor.py", line 18, in <module>
    from streamlit.runtime.scriptrunner import get_script_run_ctx
  File "C:\Users\srika\anaconda3\lib\site-packages\streamlit\runtime\__init__.py", line 16, in <module>
    from streamlit.runtime