In [1]:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import os
import PIL
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.python.keras.layers import Dense, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [2]:
import pathlib

data_dir = r'C:\Users\joaov\OneDrive\Documentos\rsna-intracranial-hemorrhage-detection\balanced_dataset\train'
data_dir = pathlib.Path(data_dir)

In [3]:
image_count = len(list(data_dir.glob('*/*.png')))
print(f'All training data: {image_count}')

All training data: 151105


In [4]:
list_ds = tf.data.Dataset.list_files(str(data_dir/'*/*'), shuffle=False)
list_ds = list_ds.shuffle(image_count, reshuffle_each_iteration=False)

print(list_ds)

<_ShuffleDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>


In [5]:
class_names = np.array(sorted([item.name for item in data_dir.glob('*')]))
print(class_names)

['with_hemorrhage' 'without_hemorrhage']


In [6]:
#val_size = int(image_count * 0.2)
def get_train_and_test_data(training_size = 1000):
    val_size = int(0.2*training_size)
    skip_size = int(image_count - training_size)

    print(f'Training size: {image_count - skip_size}')

    print(f'Validation size: {val_size}')

    train_ds = list_ds.skip(skip_size)
    val_ds = list_ds.take(val_size)
    
    return train_ds, val_ds


In [7]:
def process_datasets(train_ds, val_ds):
    def get_label(file_path):
      # Convert the path to a list of path components
      parts = tf.strings.split(file_path, os.path.sep)
      # The second to last is the class-directory
      one_hot = parts[-2] == class_names
      # Integer encode the label
      return tf.argmax(one_hot)
    def decode_img(img):
      # Convert the compressed string to a 3D uint8 tensor
      img = tf.io.decode_jpeg(img, channels=3)
      # Resize the image to the desired size
      return tf.image.resize(img, [img_height, img_width])
    
    def process_path(file_path):
      label = get_label(file_path)
      # Load the raw data from the file as a string
      img = tf.io.read_file(file_path)
      img = decode_img(img)
      return img, label
    # Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
    AUTOTUNE = tf.data.AUTOTUNE

    img_height = 180
    img_width = 180
    batch_size=32

    train_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
    val_ds = val_ds.map(process_path, num_parallel_calls=AUTOTUNE)
    
    def configure_for_performance(ds):
      ds = ds.cache()
      ds = ds.shuffle(buffer_size=1000)
      ds = ds.batch(batch_size)
      ds = ds.prefetch(buffer_size=AUTOTUNE)
      return ds

    
    return configure_for_performance(train_ds), configure_for_performance(val_ds)



In [8]:
def normalize(ds):
    normalization_layer = tf.keras.layers.Rescaling(1./255)

    normalized_ds = ds.map(lambda x, y: (normalization_layer(x), y))
    image_batch, labels_batch = next(iter(normalized_ds))
    first_image = image_batch[0]
    # Notice the pixel values are now in `[0,1]`.
    print(np.min(first_image), np.max(first_image))
    
    return normalized_ds

In [9]:
num_classes = 1

resnet_model = Sequential()
pretrained_model = tf.keras.applications.ResNet50(
    include_top=False,
    input_shape=(180,180,3),
    pooling='avg',
    classes=num_classes,
    weights='imagenet'
)

for layer in pretrained_model.layers:
  layer.trainable=False

resnet_model.add(pretrained_model)
resnet_model.add(Flatten())
resnet_model.add(Dense(512, activation='relu'))
resnet_model.add(Dense(num_classes, activation='sigmoid'))

In [10]:
resnet_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 2048)              23587712  
                                                                 
 module_wrapper (ModuleWrapp  (None, 2048)             0         
 er)                                                             
                                                                 
 module_wrapper_1 (ModuleWra  (None, 512)              1049088   
 pper)                                                           
                                                                 
 module_wrapper_2 (ModuleWra  (None, 1)                513       
 pper)                                                           
                                                                 
Total params: 24,637,313
Trainable params: 1,049,601
Non-trainable params: 23,587,712
____________________________________

In [11]:
def compile_model(learning_rate=0.0005):
    resnet_model.compile(
      optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
      loss=tf.losses.BinaryCrossentropy(),
      metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
    

In [12]:
def generate_train_test_datasets(training_size=2000):

    train_ds, val_ds = get_train_and_test_data(training_size = training_size)
    train_ds, val_ds = process_datasets(train_ds, val_ds)
    normalized_train_ds = normalize(train_ds)
    normalized_val_ds = normalize(val_ds)
    
    return normalized_train_ds, normalized_val_ds

In [13]:
def fit_model(epochs, train_ds, val_ds):
    history = resnet_model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=epochs
    )
    return history

In [14]:
learning_rates = [0.001]

histories = []
train_ds, val_ds = generate_train_test_datasets(training_size=15_000)

c = 0
for learning_rate in learning_rates:
    c += 1
    print(f'modelo {c}, learning rate = {learning_rate}')
    compile_model(learning_rate)
    history = fit_model(30, train_ds, val_ds)
    df = pd.DataFrame(history.history)
    df.to_csv(f'C:\\Users\\joaov\\Downloads\\hist_s3_{c}.csv')
    histories.append(history)

Training size: 15000
Validation size: 3000
0.0 0.03529412
0.0 1.0
modelo 1, learning rate = 0.001
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
 80/469 [====>.........................] - ETA: 45:14 - loss: 0.6113 - accuracy: 0.6313 - precision: 0.6199 - recall: 0.6667

KeyboardInterrupt: 

In [None]:
d = history.history

In [None]:
df_1 = pd.DataFrame(d)
df_1.to_csv('C:\\Users\\joaov\\Downloads\\hist_first_s3_{c}.csv')

In [None]:
def plot_loss(history):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

for history in histories:
    print(history)
    plot_loss(history)
    