In [1]:
#jun 25

import os
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.metrics import Precision, Recall
from sklearn.model_selection import train_test_split
from tensorflow.keras import backend as K

# TensorFlow GPU configuration
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    print("Using GPU:", physical_devices[0])
else:
    print("No GPU found, using CPU")

# Set up paths and categories
base_data_dir = '/blue/srampazzi/vi.gade/cov/covid'
categories = ['COVID', 'Lung_Opacity', 'Normal', 'Viral_Pneumonia']

# Load metadata and preprocess
all_metadata = pd.DataFrame()
for category in categories:
    path = f'{base_data_dir}/{category}.metadata.xlsx'
    df = pd.read_excel(path, usecols=['FILE NAME', 'FORMAT', 'SIZE'])
    df['label'] = category
    df['image_path'] = df['FILE NAME'].apply(lambda x: f'{base_data_dir}/{category}/images/{x}.png')
    df['mask_path'] = df['FILE NAME'].apply(lambda x: f'{base_data_dir}/{category}/masks/{x}.png')
    df = df[df['image_path'].apply(os.path.exists)]  # Ensure file exists
    all_metadata = pd.concat([all_metadata, df], ignore_index=True)

# Data splitting
train_df, temp_df = train_test_split(all_metadata, test_size=0.4, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Function to apply mask
def apply_mask(image, mask):
    mask = cv2.resize(mask, (image.shape[1], image.shape[0]))  # Resize mask to match image size
    return cv2.bitwise_and(image, image, mask=mask)

# Data generator function with data augmentation and masks
def data_generator_augmented_with_masks(df, batch_size=32):
    while True:
        batch_paths = df.sample(n=batch_size)
        batch_input = []
        batch_output = []
        for _, row in batch_paths.iterrows():
            image = cv2.imread(row['image_path'], cv2.IMREAD_COLOR)
            mask = cv2.imread(row['mask_path'], cv2.IMREAD_GRAYSCALE)  # Load the mask
            if mask is not None:  # Ensure mask is loaded correctly
                masked_image = apply_mask(image, mask)  # Apply the mask to the image
            else:
                masked_image = image
            image = cv2.resize(masked_image, (256, 256))
            image = image / 255.0
            image = datagen.random_transform(image)
            batch_input.append(image)
            batch_output.append(tf.keras.utils.to_categorical(categories.index(row['label']), num_classes=len(categories)))
        batch_x = np.array(batch_input)
        batch_y = np.array(batch_output)
        yield batch_x, batch_y

# Setup data generators with data augmentation and masks
train_gen_augmented = data_generator_augmented_with_masks(train_df, batch_size=32)
val_gen_augmented = data_generator_augmented_with_masks(val_df, batch_size=32)
test_gen_augmented = data_generator_augmented_with_masks(test_df, batch_size=32)

# Model building with increased regularization
base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=Input(shape=(256, 256, 3)))
for layer in base_model.layers[-10:]:
    layer.trainable = True  # Fine-tune the last 10 layers

# Add custom layers with L1/L2 regularization
regularizer = tf.keras.regularizers.l1_l2(l2=0.5)
x = Flatten()(base_model.output)
x = Dense(1024, activation='relu', kernel_regularizer=regularizer)(x)
x = Dropout(0.5)(x)
predictions = Dense(len(categories), activation='softmax', kernel_regularizer=regularizer)(x)
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model with custom metrics
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy', Precision(), Recall()])

# Callbacks for early stopping, model checkpoint, and learning rate reduction
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)
model_checkpoint = ModelCheckpoint('best_modelj25.h5', save_best_only=True, monitor='val_loss')

# Model training with data augmentation and regularization
history_augmented = model.fit(
    train_gen_augmented,
    steps_per_epoch=len(train_df) // 32,
    validation_data=val_gen_augmented,
    validation_steps=len(val_df) // 32,
    epochs=12,
    callbacks=[early_stopping, reduce_lr, model_checkpoint]
)

# Ensure model is in evaluation mode during prediction
K.set_learning_phase(0)

# Evaluate the model on the test set with augmentation
test_results_augmented = model.evaluate(test_gen_augmented, steps=len(test_df) // 32)
print(f"Test accuracy with augmentation and regularization: {test_results_augmented[1]*100:.2f}%")

# Save the model in TensorFlow SavedModel format
model.save('final_modelj25')


Using GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


2024-06-25 00:28:55.792304: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-06-25 00:28:56.621435: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 78902 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-80GB, pci bus id: 0000:87:00.0, compute capability: 8.0


Epoch 1/12


2024-06-25 00:29:03.256098: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8201
2024-06-25 00:29:05.127090: I tensorflow/stream_executor/cuda/cuda_blas.cc:1774] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.




  layer_config = serialize_layer_fn(layer)


Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12




Test accuracy with augmentation and regularization: 94.39%


2024-06-25 00:53:00.832516: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: final_modelj25/assets


  layer_config = serialize_layer_fn(layer)
  return generic_utils.serialize_keras_object(obj)


In [2]:
import os
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.metrics import Precision, Recall
from sklearn.model_selection import train_test_split

# TensorFlow GPU configuration
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    print("Using GPU:", physical_devices[0])
else:
    print("No GPU found, using CPU")

# Set up paths and categories
base_data_dir = '/blue/srampazzi/vi.gade/cov/covid'
categories = ['COVID', 'Lung_Opacity', 'Normal', 'Viral_Pneumonia']

# Load metadata and preprocess
all_metadata = pd.DataFrame()
for category in categories:
    path = f'{base_data_dir}/{category}.metadata.xlsx'
    df = pd.read_excel(path, usecols=['FILE NAME', 'FORMAT', 'SIZE'])
    df['label'] = category
    df['image_path'] = df['FILE NAME'].apply(lambda x: f'{base_data_dir}/{category}/images/{x}.png')
    df['mask_path'] = df['FILE NAME'].apply(lambda x: f'{base_data_dir}/{category}/masks/{x}.png')
    df = df[df['image_path'].apply(os.path.exists)]  # Ensure file exists
    all_metadata = pd.concat([all_metadata, df], ignore_index=True)

# Data splitting
train_df, temp_df = train_test_split(all_metadata, test_size=0.4, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Function to apply mask
def apply_mask(image, mask):
    mask = cv2.resize(mask, (image.shape[1], image.shape[0]))  # Resize mask to match image size
    return cv2.bitwise_and(image, image, mask=mask)

# Data generator function with data augmentation and masks
def data_generator_augmented_with_masks(df, batch_size=32):
    while True:
        batch_paths = df.sample(n=batch_size)
        batch_input = []
        batch_output = []
        for _, row in batch_paths.iterrows():
            image = cv2.imread(row['image_path'], cv2.IMREAD_COLOR)
            mask = cv2.imread(row['mask_path'], cv2.IMREAD_GRAYSCALE)  # Load the mask
            if mask is not None:  # Ensure mask is loaded correctly
                masked_image = apply_mask(image, mask)  # Apply the mask to the image
            else:
                masked_image = image
            image = cv2.resize(masked_image, (256, 256))
            image = image / 255.0
            image = datagen.random_transform(image)
            batch_input.append(image)
            batch_output.append(tf.keras.utils.to_categorical(categories.index(row['label']), num_classes=len(categories)))
        batch_x = np.array(batch_input)
        batch_y = np.array(batch_output)
        yield batch_x, batch_y

# Setup data generators with data augmentation and masks
train_gen_augmented = data_generator_augmented_with_masks(train_df, batch_size=32)
val_gen_augmented = data_generator_augmented_with_masks(val_df, batch_size=32)
test_gen_augmented = data_generator_augmented_with_masks(test_df, batch_size=32)

# Model building with increased regularization
base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=Input(shape=(256, 256, 3)))
for layer in base_model.layers[-10:]:
    layer.trainable = True  # Fine-tune the last 10 layers

# Add custom layers with L1/L2 regularization
regularizer = tf.keras.regularizers.l1_l2(l2=0.5)
x = Flatten()(base_model.output)
x = Dense(1024, activation='relu', kernel_regularizer=regularizer)(x)
x = Dropout(0.5)(x)
predictions = Dense(len(categories), activation='softmax', kernel_regularizer=regularizer)(x)
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model with custom metrics
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy', Precision(), Recall()])

# Callbacks for early stopping, model checkpoint, and learning rate reduction
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss')

# Model training with data augmentation and regularization
history_augmented = model.fit(
    train_gen_augmented,
    steps_per_epoch=len(train_df) // 32,
    validation_data=val_gen_augmented,
    validation_steps=len(val_df) // 32,
    epochs=12,
    callbacks=[early_stopping, reduce_lr, model_checkpoint]
)

# Evaluate the model on the test set with augmentation
test_results_augmented = model.evaluate(test_gen_augmented, steps=len(test_df) // 32)
print(f"Test accuracy with augmentation and regularization: {test_results_augmented[1]*100:.2f}%")

# Save the model in TensorFlow SavedModel format
model.save('final_model')

Using GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
Epoch 1/12

  layer_config = serialize_layer_fn(layer)


Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test accuracy with augmentation and regularization: 90.40%


2024-06-19 02:56:46.673114: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: final_model/assets


  layer_config = serialize_layer_fn(layer)
  return generic_utils.serialize_keras_object(obj)
