# List of Functions

This document contains all the functions used on this repository on the different Notebooks

<b>Function to access the number of images inside the NORMAL and PNEUMONIA folders</b>

In [None]:
# Function that will access all the subset folders (train, test and val) to address the numbers of X-rays per folder

def number_of_xrays(xray_normal, xray_pneumo):  
    
# X-Rays classified as 'NORMAL' or equal to 'No Pneumonia'

    print('Number of X-rays classified as NORMAL:', len(os.listdir(xray_normal))) # Number of x-rays classified as 'NORMAL'

# X-rays classified as 'PNEUMONIA' meaning that the patient has pneumonia

    print('Number of X-rays classified as PNEUMONIA:', len(os.listdir(xray_pneumo))) # Number of x-rays classified as 'PNEUMONIA'

# Total number of X-rays

    print('Total number of Train X-rays:', len(os.listdir(xray_normal)) + len(os.listdir(xray_pneumo)))

<b>Function that randomly selects a pair of images, one from the NORMAL and a second from the PNEUMONIA folders, then displays them using subplots

In [None]:
def X_ray_QC(path, set_normal, set_pneumo):
    
# X-Rays classified as 'NORMAL' or equal to 'No-Pneumonia'

    rand_norm = np.random.randint(0, len(os.listdir(path)))
    norm_xray = os.listdir(set_normal)[rand_norm]
    print('Normal xray file name:', norm_xray)

# X-rays classified as 'PNEUMONIA' meaning that the patient has pneumonia

    rand_pneumo = np.random.randint(0, len(os.listdir(path)))
    pneumo_xray = os.listdir(set_pneumo)[rand_pneumo]
    print('Pneumo xray file name:', pneumo_xray)

# Image loading

    norm_xray_address = set_normal + norm_xray
    pneumo_xray_address = set_pneumo + pneumo_xray

    normal_load = Image.open(norm_xray_address)
    pneumonia_load = Image.open(pneumo_xray_address)

# Plotting the X-ray files:

    figure = plt.figure(figsize=(14,6))

    ax1 = figure.add_subplot(1,2,1)
    xray_plot = plt.imshow(normal_load, cmap='gray')
    plt.rcParams["figure.facecolor"] = "lightblue"
    ax1.set_title('NORMAL', fontsize=14)
    ax1.axis('on')
    
    ax2 = figure.add_subplot(1,2,2)
    xray_plot = plt.imshow(pneumonia_load, cmap='gray')
    ax2.set_title('PNEUMONIA', fontsize=14)
    ax2.axis('on')
    plt.show()

<b>Function that plots the number of NORMAL and PNEUMONIA images to check subset balance

In [None]:
def scaling_check(data):
    
    case_count = data['diagnosis'].value_counts() # 'data' is our input which will be any of the 3 dataframes created
    print('Legend:')
    print(case_count)
    
    plt.figure(figsize=(10,6))
    sns.barplot(x=case_count.index, y=case_count.values)
    plt.title('Diagnosis Balance Plot', fontsize=16)
    plt.xlabel('Diagnosis', fontsize=12)
    plt.ylabel('Number of Cases', fontsize=12)
    plt.xticks(range(len(case_count.index)), ['NORMAL(0)', 'PNEUMONIA(1)'])
    plt.show()

In [None]:
def dir_file_count(directory):
    return sum([len(files) for r, d, files in os.walk(directory)])

<b>Function to resize (re-scale) images from bot folders at once and creates a test data and test labels list which will be used for the model evaluation</b>

In [None]:
def data_processing(img_dims, batch_size):
    

    test_data = []
    test_labels = []

    for cond in ['/NORMAL/', '/PNEUMONIA/']:
        for img in (os.listdir(data_dir + 'test' + cond)):
                img = plt.imread(data_dir +'test'+cond+img)
                img = cv2.resize(img, (img_dims, img_dims))
                img = np.dstack([img, img, img])
                img = img.astype('float32') / 255
                if cond=='/NORMAL/':
                    label = 0
                elif cond=='/PNEUMONIA/':
                    label = 1
                test_data.append(img)
                test_labels.append(label)
        
        test_data = np.array(test_data)
        test_labels = np.array(test_labels)
    
    return (train_gen, test_gen, test_data, test_labels)

<b>Function to evaluate the model performance via plotting of the loss, val_loss, acc, and acc_loss</b>

In [None]:
def model_perf_vis(history):
    
    history_dict = history.history
    train_loss_values = history_dict['loss']
    val_loss_values = history_dict['val_loss']
    train_accuracy = history_dict['acc']
    val_accuracy = history_dict['val_acc']

    fig, axis = plt.subplots(ncols=1, nrows=2, figsize=(7,7))
    
    # Loss plot 
    
    epochs = range(1, len(val_loss_values) + 1)
    chart1 = sns.lineplot(ax=axis[0], x=epochs, y=train_loss_values, label='Training Loss')
    
    chart1 = sns.lineplot(ax=axis[0], x=epochs, y=val_loss_values, label='Validation Loss')
    chart1.set(xlabel='Epochs', ylabel='Loss')
    chart1.axes.set_title('Model Loss', fontsize=20)
    chart1.grid(b=True, which='major', axis='y')
    
    chart2 = sns.lineplot(ax=axis[1], x=epochs, y=train_accuracy, label='Training Accuracy')
    chart2 = sns.lineplot(ax=axis[1], x=epochs, y=val_accuracy, label='Validation Accuracy')
    chart2.set(xlabel='Epochs', ylabel='Accuracy')
    chart2.axes.set_title('Model Accuracy', fontsize=20)
    chart2.grid(b=True, which='major', axis='y')
    
    plt.tight_layout()
    plt.show()

<b>Function to quickly fit the models with fixed parameters, including 20 epochs</b>

In [None]:
def model_fitting(model_N, history_N):
    
    optimizer=optimizers.Adam()
    loss='categorical_crossentropy'
    metrics=['accuracy']
    epochs = 20
    steps_per_epoch=100
    validation_steps=50

    model_N.compile(optimizer, loss=loss, metrics=metrics)

    history_N = model_N.fit_generator(train_generator, 
                                      steps_per_epoch=steps_per_epoch, 
                                      epochs=epochs, 
                                      verbose=2, 
                                      validation_data=validation_generator, 
                                      validation_steps=validation_steps)

<b>Only used for README images:</b>

<b>Function created to display 2 rows x 5 columns of x-rays images to show on the README notebook</b>

In [None]:
pneumonia_samples = (train_data[train_data['label']==1]['image'].iloc[:5]).tolist()
normal_samples = (train_data[train_data['label']==0]['image'].iloc[:5]).tolist()

# Concat the data in a single list and del the above two list
samples = pneumonia_samples + normal_samples
del pneumonia_samples, normal_samples

# Plot the data 
f, ax = plt.subplots(2,5, figsize=(45,15))
for i in range(10):
    img = imread(samples[i])
    ax[i//5, i%5].imshow(img, cmap='gray')
    if i<5:
        ax[i//5, i%5].set_title("Pneumonia")
    else:
        ax[i//5, i%5].set_title("Normal")
    ax[i//5, i%5].axis('on')
    ax[i//5, i%5].set_aspect('auto')
plt.show()