In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from glob import glob

import numpy as np
import pandas as pd

In [3]:
from xray import data, trainer, utils

In [5]:
path_to_csv = "../../raw_data/sample-data/"
csv_file = "sample_labels.csv"

In [11]:
df = data.get_data(os.path.join(path_to_csv, csv_file))

In [12]:
df.drop(
    columns=[
        "Follow-up #",
        "Patient Age",
        "Patient Gender",
        "View Position",
        "OriginalImagePixelSpacing_x",
        "OriginalImagePixelSpacing_y",
        "OriginalImageWidth",
        "OriginalImageHeight",
    ],
    inplace=True,
    errors="ignore",
)

In [13]:
utils.get_paths(df, path_to_csv, verbose=0)

In [15]:
df["Fixed_Labels"] = df["Finding Labels"].map(lambda x: x.split("|"))
df.head(3)

Unnamed: 0,Image Index,Finding Labels,Patient ID,path,Fixed_Labels
0,00000013_005.png,Emphysema|Infiltration|Pleural_Thickening|Pneu...,13,../../raw_data/sample-data/images/multilabel/0...,"[Emphysema, Infiltration, Pleural_Thickening, ..."
1,00000013_026.png,Cardiomegaly|Emphysema,13,../../raw_data/sample-data/images/multilabel/0...,"[Cardiomegaly, Emphysema]"
2,00000017_001.png,No Finding,17,../../raw_data/sample-data/images/No Finding/0...,[No Finding]


In [18]:
# Keep relative paths
df.path = df.path.map(lambda x: "/".join(x.split("/")[-3:]))

In [19]:
df_sick = df[df["Finding Labels"] != "No Finding"]

In [22]:
ds_train, ds_val, ds_test = data.split_df(
    dataset=df_sick,
    column_to_filter_by="Patient ID",
    train_val_test=(0.65, 0.15, 0.15),
    total_filter=1,
)
print("train :", ds_train.shape)
print("val :", ds_val.shape)
print("test :", ds_test.shape)

train : (1670, 5)
val : (396, 5)
test : (374, 5)


In [24]:
input_shape = (224, 224)

In [25]:
gen_train = data.build_generator(
    img_path=path_to_csv,
    labels_df=ds_train,
    index_col="path",
    labels_col="Fixed_Labels",
    train_set_batch_size=32,
    target_size=(input_shape[0], input_shape[1]),
    binary_class=False,
    test_set=False,
    data_augment=False,
)

gen_val = data.build_generator(
    img_path=path_to_csv,
    labels_df=ds_val,
    index_col="path",
    labels_col="Fixed_Labels",
    train_set_batch_size=32,
    target_size=(input_shape[0], input_shape[1]),
    binary_class=False,
    test_set=False,
    data_augment=False,
)

Found 1670 validated image filenames belonging to 14 classes.
Found 396 validated image filenames belonging to 14 classes.


In [79]:
gen_test = data.build_generator(
    img_path=path_to_csv,
    labels_df=ds_test,
    index_col="path",
    labels_col="Fixed_Labels",
    train_set_batch_size=32,
    target_size=(input_shape[0], input_shape[1]),
    binary_class=False,
    test_set=True,
    data_augment=False,
)


Found 374 validated image filenames belonging to 14 classes.


In [28]:
total_classes = len(model.gen_train.class_indices)
total_classes

14

In [29]:
model = trainer.Trainer(
    gen_train=gen_train,
    gen_val=gen_val,
    category_type="multilabel",
)

In [35]:
cnn_arch = {
    "input_shape": input_shape,
    "output_shape": total_classes,
    "output_activation": "sigmoid",
    "dense_layer_geometry": (1024, 512, 256),
    'transfer_model': 'VGG16',
    "dense_layer_activation": "relu",
    "dropout_layers": False,
}

In [55]:
model.build_cnn(**cnn_arch)

In [60]:
model.compile_model()

In [61]:
model.experiment_name

'[AR] [BS AS] [xray-diagnosis]_vgg16_                                        2021-10-17_11:36:13.248180'

In [85]:
history = model.fit_model(epochs=1)


Epoch 00001: val_loss improved from inf to 0.30292, saving model to best_weights.hdf5


TypeError: count() takes exactly one argument (0 given)

In [86]:
history.history

{'loss': [0.28793010115623474],
 'accuracy': [0.0],
 'precision': [0.5616438388824463],
 'recall': [0.06441476941108704],
 'categorical_accuracy': [0.28143712878227234],
 'val_loss': [0.3029172718524933],
 'val_accuracy': [0.0],
 'val_precision': [0.46315789222717285],
 'val_recall': [0.13858267664909363],
 'val_categorical_accuracy': [0.2777777910232544],
 'lr': [1e-04]}

In [101]:
evalu = model.pipeline.evaluate(gen_test, workers=4, use_multiprocessing=True)



In [102]:
evalu

[0.3011270761489868,
 0.0,
 0.4204545319080353,
 0.12758620083332062,
 0.2513369023799896]

In [100]:
model.pipeline.

[<keras.metrics.Mean at 0x7f5977ca2700>,
 <keras.metrics.Accuracy at 0x7f5977d62040>,
 <keras.metrics.Precision at 0x7f5977c92f70>,
 <keras.metrics.Recall at 0x7f5901d0cca0>,
 <keras.metrics.CategoricalAccuracy at 0x7f5977c92f10>]

In [75]:
model.pipeline.compiled_metrics.metrics

[<keras.metrics.Accuracy at 0x7f5977d62040>,
 <keras.metrics.Precision at 0x7f5977c92f70>,
 <keras.metrics.Recall at 0x7f5901d0cca0>,
 <keras.metrics.CategoricalAccuracy at 0x7f5977c92f10>]

In [77]:
model.pipeline.compiled_loss.metrics.

[<keras.metrics.Mean at 0x7f5977ca2700>]

In [None]:
from tensorflow import image