In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import numpy as np
import pandas as pd

import os
from datetime import datetime

from tqdm import tqdm

from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, cohen_kappa_score, \
    top_k_accuracy_score, confusion_matrix, classification_report

import plotly.express as px
import plotly.subplots as sp

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import models
from torch import flatten
import torchvision.transforms as transforms
from torch.utils.data import random_split

# Other_tools

In [3]:
def get_model_information(model):

    # Get and print information about the parameter number
    print()
    print("Model Summary")
    #txt_file.write("Model Summary\n")

    # Get all parameters of the model layer by layer (trainable or not)
    model_parameters = [layer for layer in model.parameters()]
    # Get the name of each layer
    layer_name = [child for child in model.children()]

    # Define the column name of the DataFrame
    column_name = ["Layer Name", "Number of Trainable Parameters", "Number of (non trainable) Parameters"]
    # Instantiate the table information with a DataFrame
    table_information = pd.DataFrame(columns=column_name)

    # Count the character number for each string element in the list "layer_name"
    character_counts = [len(str(string_element)) for string_element in layer_name]
    # Get the maximum character number
    max_character_number = max(character_counts)

    print("=" * (max_character_number + 2 + 30 + 2 + 36))
    #txt_file.write("=" * (max_character_number + 2 + 30 + 2 + 36))
    #txt_file.write("\n")

    # Initialize variables
    j = 0
    total_trainable_params = 0
    total_params = 0

    # print("\t" * 10)
    # For each layer
    for i in layer_name:

        # Initialize an empty list
        tmp_list = []

        # Set an exception if "i.biais" does not exist (there is no possible biais in the defined layer)
        try:

            # Get "i.biais" if it exists
            bias = (i.bias is not None)

            # If the defined biais in the layer is set to True
            if bias is True:

                # and if the parameters of the current layer require gradient (if trainable)
                if model_parameters[j].requires_grad is True:

                    # Then get the number of trainable parameters
                    trainable_params = model_parameters[j].numel() + model_parameters[j + 1].numel()
                    # Print information in the console
                    # print(str(i) + "\t" * 3 + str(trainable_params))
                    # Add information in "tmp_list"
                    tmp_list.append(str(i))
                    tmp_list.append(trainable_params)
                    tmp_list.append(0)

                    # Update the total number of trainable parameters
                    total_trainable_params += trainable_params

                else:

                    # Then get the number of parameters (non trainable)
                    params = model_parameters[j].numel() + model_parameters[j + 1].numel()
                    # Print information in the console
                    # print(str(i) + "\t" * 3 + str(params))
                    # Add information in "tmp_list"
                    tmp_list.append(str(i))
                    tmp_list.append(0)
                    tmp_list.append(params)

                    # Update the total number of parameters
                    total_params += params

                # Increment the counter
                j = j + 2

            else:  # if biais is false

                # and if the parameters of the current layer require gradient (if trainable)
                if model_parameters[j].requires_grad is True:

                    # Then get the number of trainable parameters
                    trainable_params = model_parameters[j].numel()
                    # Print information in the console
                    # print(str(i) + "\t" * 3 + str(trainable_params))
                    # Add information in "tmp_list"
                    tmp_list.append(str(i))
                    tmp_list.append(trainable_params)
                    tmp_list.append(0)

                    # Update the total number of trainable parameters
                    total_trainable_params += trainable_params

                else:

                    # Then get the number of parameters (non trainable)
                    params = model_parameters[j].numel()
                    # Print information in the console
                    # print(str(i) + "\t" * 3 + str(params))
                    # Add information in "tmp_list"
                    tmp_list.append(str(i))
                    tmp_list.append(0)
                    tmp_list.append(params)

                    # Update the total number of parameters
                    total_params += params

                # Increment the counter
                j = j + 1

        except:  # If there is no biais

            # Just print the name of the layer
            # print(str(i))
            # Add information in "tmp_list"
            tmp_list.append(str(i))
            tmp_list.append(0)
            tmp_list.append(0)

        # Update the DataFrame
        table_information.loc[len(table_information)] = tmp_list

    # Print the table of information
    print(table_information.to_string(index=False, justify="center"))
    #txt_file.write(table_information.to_string(index=False, justify="center"))
    #txt_file.write("\n")

    # Print the total number of trainable and non trainable parameters
    print("=" * (max_character_number + 2 + 30 + 2 + 36))
    # txt_file.write("=" * (max_character_number + 2 + 30 + 2 + 36))
    # txt_file.write("\n")
    print(f"Total")
    # txt_file.write(f"Total")
    # txt_file.write("\n")
    print(f"    Trainable Parameters: {total_trainable_params}")
    # txt_file.write(f"    Trainable Parameters: {total_trainable_params}")
    # txt_file.write("\n")
    print(f"    Non Trainable Parameters: {total_params}")
    # txt_file.write(f"    Non Trainable Parameters: {total_params}")
    # txt_file.write("\n")
    print("=" * (max_character_number + 2 + 30 + 2 + 36))
    # txt_file.write("=" * (max_character_number + 2 + 30 + 2 + 36))
    print()
    # txt_file.write("\n")

# Scores and graphs

In [4]:
# This function compute the accuracy for a mini-batch
def compute_accuracy(labels, outputs):

    # Transform the one-hot vectors (labels and outputs) into integers
    labels = labels.argmax(dim=1)
    outputs = outputs.argmax(dim=1)

    # Compute the accuracy of the current mini-batch
    corrects = (outputs == labels)
    accuracy = corrects.sum().float() / float(labels.size(0))

    return accuracy.item()

In [5]:
# Transform a one hot vector into an integer (index of the maximum value)
def vec_to_int(y_true, y_predicted):

    # Transform the vector of values (one-hot and probabilities) into integers
    y_true = np.argmax(y_true, axis=1)
    y_predicted = np.argmax(y_predicted, axis=1)

    return y_true, y_predicted

In [6]:
def model_performances(y_true, y_predicted, loss, my_score_df):
    # Initialize a list
    scores = []

    # Get the index of the maximum value in the vectors "y_true" and "y_predicted"
    y_int_true, y_int_predicted = vec_to_int(y_true, y_predicted)

    # Compute scores and add them to the list scores
    scores.extend([loss])
    scores.extend([accuracy_score(y_int_true, y_int_predicted)])
    scores.extend([balanced_accuracy_score(y_int_true, y_int_predicted)])
    scores.extend([f1_score(y_int_true, y_int_predicted, average="micro")])
    scores.extend([cohen_kappa_score(y_int_true, y_int_predicted)])
    # scores.extend([top_k_accuracy_score(y_int_true, y_predicted, k=2)])
    # scores.extend([top_k_accuracy_score(y_int_true, y_predicted, k=3)])
        
    # Ensure we add all expected columns, even if some metrics are not calculated
    while len(scores) < len(my_score_df.columns):
        scores.append(0.0)
    
    my_score_df.loc[len(my_score_df)] = scores

    return my_score_df


In [7]:
# This function is only to use on the test set
def show_compute_model_performances(y_true, y_predicted, loss, my_score_df, classes):
    # Initialize a list
    scores = []

    # Get the index of the maximum value in the vectors "y_true" and "y_predicted"
    y_int_true, y_int_predicted = vec_to_int(y_true, y_predicted)

    # Compute and store base metrics
    scores.extend([loss])
    
    # Accuracy
    accuracy = accuracy_score(y_int_true, y_int_predicted)
    print("Accuracy: " + str(accuracy))
    scores.extend([accuracy])
    
    # Balanced accuracy
    balanced_accuracy = balanced_accuracy_score(y_int_true, y_int_predicted)
    print("Balanced Accuracy: " + str(balanced_accuracy))
    scores.extend([balanced_accuracy])
    
    # F1-score
    f1 = f1_score(y_int_true, y_int_predicted, average="micro")
    print("F1-score: " + str(f1))
    scores.extend([f1])
    
    # Cohen Kappa
    kappa = cohen_kappa_score(y_int_true, y_int_predicted)
    print("Kappa: " + str(kappa))
    scores.extend([kappa])
    
    # Add zeros for top-k accuracies to match DataFrame columns
    # scores.extend([0.0, 0.0])  # For Top 2 and Top 3 accuracy columns

    # Confusion matrix and classification report
    if y_true.shape[1] <= 10:
        print(confusion_matrix(y_int_true, y_int_predicted))
    print(classification_report(y_int_true, y_int_predicted, target_names=classes))

    my_score_df.loc[len(my_score_df)] = scores

    return my_score_df

In [8]:
def create_score_df(training_epoch_scores, validation_epoch_scores, score_type):

    # Create a DataFrame for plotting the train "score_type"
    train_df = pd.DataFrame(columns=["Epochs", "Stage", score_type])
    # Create the vectors of values for the epochs and the stage of the training process
    epochs = np.arange(1, training_epoch_scores.shape[0] + 1, 1)
    stage = ["Train"] * training_epoch_scores.shape[0]
    # Fill the DataFrame for the train "score_type"
    train_df["Epochs"] = epochs
    train_df["Stage"] = stage
    train_df[score_type] = training_epoch_scores[score_type]

    # Create a DataFrame for plotting the validation "score_type"
    validation_df = pd.DataFrame(columns=["Epochs", "Stage", score_type])
    # Create the vector of values for the stage of the training process
    stage = ["Validation"] * training_epoch_scores.shape[0]
    # Fill the DataFrame for the validation "score_type"
    validation_df["Epochs"] = epochs
    validation_df["Stage"] = stage
    validation_df[score_type] = validation_epoch_scores[score_type]

    # Merge the two DataFrame
    score_df = pd.concat([train_df, validation_df])

    return score_df

In [9]:
def plot_score_graphs(training_epoch_scores, validation_epoch_scores):
    # List of scores to plot
    scores_to_plot = ["Loss", "Accuracy", "Balanced Accuracy", "F1-score", "Kappa"]

    # Create subplots: one row for each score type
    fig = sp.make_subplots(rows=len(scores_to_plot), cols=1, subplot_titles=scores_to_plot)

    # For each score to plot in the defined list
    for i, score_type in enumerate(scores_to_plot, start=1):
        # Create the DataFrame to be used with Plotly
        the_df = create_score_df(training_epoch_scores, validation_epoch_scores, score_type)

        # Plot the score evolution for the training and validation stages
        for trace in px.line(the_df, x="Epochs", y=score_type, color="Stage").data:
            fig.add_trace(trace, row=i, col=1)

    # Update layout
    fig.update_layout(height=300 * len(scores_to_plot), title_text="Training and Validation Scores")

    # Optionally, display the figure in a Jupyter notebook
    fig.show()

# Train process

In [10]:
# Compute the outputs of the model with specific inputs (and its corresponding labels to compute few performances)
def compute_model_outputs(inputs, labels, device, model, all_labels, all_outputs, loss_function):

    # Move the x data and y labels into the device chosen for the training
    inputs, labels = inputs.to(device), labels.to(device)

    # Compute the outputs of the network with the x data of the current mini-batch
    outputs = model(inputs)
    # Store the labels and outputs of the current mini-batch
    all_labels.extend(np.array(labels.cpu()))
    all_outputs.extend(np.array(outputs.detach().cpu()))

    # Compute the loss for each instance in the mini-batch
    loss = loss_function(outputs, labels)
    # Compute the accuracy of the current mini-batch
    accuracy = compute_accuracy(labels, outputs)

    return all_labels, all_outputs, loss, accuracy

In [11]:
# Train the neural network
def train_model(epoch_number, train_loader, validation_loader, model, optimizer, loss_function, device, results_path,
                my_folder_name):

    # Initialize a DataFrame where to store metrics
    training_epoch_scores = pd.DataFrame(columns=["Loss", "Accuracy", "Balanced Accuracy", "F1-score", "Kappa"])


    validation_epoch_scores = pd.DataFrame(columns=["Loss", "Accuracy", "Balanced Accuracy", "F1-score", "Kappa"])

    # Tell to your model that you are training it
    model.train()

    # For each epoch
    for epoch in range(epoch_number):

        # Initialize a mini-batch counter
        mini_batch_counter = 0

        # Initialize the loss and accuracy
        running_loss = 0.0
        running_accuracy = 0.0

        # Initialize two variables to store the outputs of the neural network and the labels (for the whole epoch)
        all_outputs = []
        all_labels = []

        # Assign the tqdm iterator to the variable "progress_epoch"
        with tqdm(train_loader, unit=" mini-batch") as progress_epoch:

            # For each mini-batch defined in the train loader through the variable "progress_epoch"
            for inputs, labels in progress_epoch:

                # Set the description of the progress bar
                progress_epoch.set_description(f"Epoch {epoch + 1}/{epoch_number}")

                # Compute the outputs of the model with specific inputs
                all_labels, all_outputs, loss, accuracy = compute_model_outputs(inputs, labels, device, model,
                                                                                all_labels, all_outputs, loss_function)

                # Update the weights and biais of the network
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # Update the running loss
                running_loss += loss.item()
                # Update the running accuracy
                running_accuracy += accuracy

                # Display the updated loss and the accuracy
                progress_epoch.set_postfix(train_loss=running_loss / (mini_batch_counter + 1),
                                           train_accuracy=100. * (running_accuracy / (mini_batch_counter + 1)))

                # Increment the mini-batch counter
                mini_batch_counter += 1

        # Compute the performances of the training on the current epoch and store the scores
        training_epoch_scores = model_performances(np.array(all_labels), np.array(all_outputs),
                                                   running_loss / mini_batch_counter, training_epoch_scores)

        # Check performance of the model on the validation set after each training epoch
        validation_epoch_scores = validate_model(validation_loader, model, loss_function, device,
                                                 validation_epoch_scores)

    # Plot metrics
    plot_score_graphs(training_epoch_scores, validation_epoch_scores)

    return model

In [12]:
def validate_model(validation_loader, model, loss_function, device, validation_epoch_scores):

    # Tell to your model that your are evaluating it
    model.eval()

    # Initialize a mini-batch counter
    mini_batch_counter = 0

    # Initialize the loss and accuracy
    running_loss = 0.0
    running_accuracy = 0.0

    # Initialize two variables to store the outputs of the neural network and the labels (for the whole validation set
    # at the end of the current epoch)
    all_outputs = []
    all_labels = []

    # Assign the tqdm iterator to the variable "progress_validation"
    with tqdm(validation_loader, unit=" mini-batch") as progress_validation:

        # For each mini-batch defined in the validation loader through the variable "progress_validation"
        for inputs, labels in progress_validation:

            # Set the description of the progress bar
            progress_validation.set_description("               Validation step")

            # Compute the outputs of the model with specific inputs
            all_labels, all_outputs, loss, accuracy = compute_model_outputs(inputs, labels, device, model, all_labels,
                                                                            all_outputs, loss_function)

            # Update the running loss
            running_loss += loss.item()
            # Update the running accuracy
            running_accuracy += accuracy

            # Display the updated loss and the accuracy
            progress_validation.set_postfix(validation_loss=running_loss / (mini_batch_counter + 1),
                                            validation_accuracy=100. * (running_accuracy / (mini_batch_counter + 1)))

            # Increment the mini-batch counter
            mini_batch_counter += 1

    # Compute the performances on the validation set of the current epoch and store the scores
    validation_epoch_scores = model_performances(np.array(all_labels), np.array(all_outputs),
                                                 running_loss / mini_batch_counter, validation_epoch_scores)

    return validation_epoch_scores

# Test process

In [13]:
# Test the trained model with the test set
def test_model(test_loader, model, loss_function, device, classes):
    print()
    print()

    # Initialize DataFrame with correct columns
    test_scores = pd.DataFrame(columns=["Loss", "Accuracy", "Balanced Accuracy", "F1-score", "Kappa"])

    # Tell your model that you are evaluating it
    model.eval()

    # Initialize counters and storage
    mini_batch_counter = 0
    running_loss = 0.0
    running_accuracy = 0.0
    all_outputs = []
    all_labels = []

    with torch.no_grad():  # Add this to prevent gradient computation during testing
        with tqdm(test_loader, unit=" mini-batch") as progress_testing:
            for inputs, labels in progress_testing:
                progress_testing.set_description("Testing the training model")

                # Compute outputs
                all_labels, all_outputs, loss, accuracy = compute_model_outputs(inputs, labels, device, model,
                                                                             all_labels, all_outputs, loss_function)

                # Update running metrics
                running_loss += loss.item()
                running_accuracy += accuracy
                
                # Update progress bar
                progress_testing.set_postfix(
                    testing_loss=running_loss / (mini_batch_counter + 1),
                    testing_accuracy=100. * (running_accuracy / (mini_batch_counter + 1))
                )
                
                mini_batch_counter += 1

    # Compute final performances
    test_scores = show_compute_model_performances(np.array(all_labels), np.array(all_outputs),
                                                running_loss / mini_batch_counter, test_scores, classes)

    return test_scores

# User parameters

In [14]:
# Define the path of the dataset to use
dataset_path = ("/kaggle/input/chest-xray-pneumonia/chest_xray/")
train_path = dataset_path + "train/"
test_path = dataset_path + "test/"
val_path = dataset_path + "val/"

# Define the path where to save the results
results_path = ("/kaggle/working/results")


### For transfert learning
# Define the number of epochs of the model training
epoch_number_tl = 3

# Define the size of the mini-batch
batch_size = 32

# Define the learning rate
learning_rate = 0.001

### For our CNN
epoch_number_cnn = 15

### Create a folder and file to save results

In [15]:
# Get the date and time
now = datetime.now()
# Create the folder name
my_folder_name = now.strftime("%Y-%m-%d_%H" + "h" + "%M" + "min" + "%S" + "sec")
# Create the folder
os.makedirs(os.path.join(results_path, my_folder_name))
print("Result folder created")
# Print a message in the consoleprint("\nResult folder created")

# Create and open a txt file to store information about the model performances
# txt_file = open(os.path.join(results_path, my_folder_name, "Results.txt"), "a")

# Write information about the architecture used for classification
# txt_file.write("Model Information\n")
# txt_file.write("    - Model: ResNet50\n")
# txt_file.write("    - Task: Classification\n")
# txt_file.write("    - Type of training: Transfer learning\n\n")
# Write information about hyperparameters
# txt_file.write("Hyperparameters\n")
# txt_file.write("    - Epoch number: " + str(epoch_number) + "\n")
# txt_file.write("    - Batch size: " + str(batch_size) + "\n")
# txt_file.write("    - Learning rate: " + str(learning_rate) + "\n\n")

Result folder created


# Check GPU availability and create the network model

In [16]:
# Check if GPU is available and set the device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


# Transfert Learning

## Load the dataset for transfert learning

In [17]:
""" Data Transformation """
# Define the transformations to apply to x data (follow ones applied for ResNet50_Weights.IMAGENET1K_V2)
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),  # Convert grayscale to RGB by duplicating the single channel
    transforms.Resize((232, 232)),
    transforms.RandomResizedCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

# Define the transformations to apply to y labels
target_transform = transforms.Compose(
    [transforms.Lambda(
        lambda y: torch.zeros(2, dtype=torch.float).scatter_(
            0, 
            torch.tensor(y),
            value=1)
        )
    ]
)

In [18]:
"""" Train, validation and test sets """
# Load the dataset by applying transformations
train_set = torchvision.datasets.ImageFolder(train_path , transform=transform, target_transform=target_transform)
test_set = torchvision.datasets.ImageFolder(test_path, transform=transform, target_transform=target_transform)
# val_set = torchvision.datasets.ImageFolder(val_path, transform=transform, target_transform=target_transform)

# Define the split ratio and split the dataset
train_ratio = 0.8  # 80% for training, 20% for validation
train_size = int(train_ratio * len(train_set))
val_size = len(train_set) - train_size

train_subset, val_subset = random_split(train_set, [train_size, val_size])


# Create the Python iterator for the datasets
train_loader = torch.utils.data.DataLoader(train_subset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)
val_loader = torch.utils.data.DataLoader(val_subset, batch_size=batch_size, shuffle=True)

"""ATTENTION"""
"""Ici l'algorithme fera de la classification binaire PNEUMONIA / NORMAL"""
"""Pour changer cela et spécifier VIRUS ou BACTERIA il faudra les différencier dans les dossiers."""

# Load the first batch of images from the train set
images, labels = next(iter(train_loader))

# Get the shape of the images
image_shape = list(images.data.shape)
# Get automatically the number of channels of images
image_channel = image_shape[1]

# Get the number of classes from the dataset
classes = test_set.classes
class_number = len(list(classes))

classes, class_number

(['NORMAL', 'PNEUMONIA'], 2)

## Load the ResNet on  GPU

In [19]:
### transfert learning
# Instantiate the model to exploit
model_tl = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)

# Replace the last fully connected layer to fit with the number of classes of the dataset
model_tl.fc = nn.Linear(model_tl.fc.in_features, class_number)

# Place the model into the GPU if available
model_tl = model_tl.to(device)

# Print information about the model
get_model_information(model_tl)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 189MB/s]



Model Summary
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         

## Set the loss function and optimizer

In [20]:
# Create the loss function
loss_function = nn.CrossEntropyLoss()
# Create the optimizer
optimizer = optim.SGD(model_tl.parameters(), lr=learning_rate)

## Train and Test the Transfert Learning

In [21]:
# Train the neural network
model_tl = train_model(epoch_number_tl, train_loader, val_loader, model_tl, optimizer, loss_function, device,
                    results_path, my_folder_name)
# Test the neural network
test_model(test_loader, model_tl, loss_function, device, classes)

Epoch 1/3: 100%|██████████| 131/131 [02:34<00:00,  1.18s/ mini-batch, train_accuracy=72.7, train_loss=0.585]
               Validation step: 100%|██████████| 33/33 [00:31<00:00,  1.04 mini-batch/s, validation_accuracy=74.9, validation_loss=0.533]
Epoch 2/3: 100%|██████████| 131/131 [02:04<00:00,  1.05 mini-batch/s, train_accuracy=74.8, train_loss=0.52]
               Validation step: 100%|██████████| 33/33 [00:25<00:00,  1.29 mini-batch/s, validation_accuracy=75.2, validation_loss=0.449]
Epoch 3/3: 100%|██████████| 131/131 [02:05<00:00,  1.04 mini-batch/s, train_accuracy=83.4, train_loss=0.351]
               Validation step: 100%|██████████| 33/33 [00:25<00:00,  1.30 mini-batch/s, validation_accuracy=89.4, validation_loss=0.323]






Testing the training model: 100%|██████████| 20/20 [00:19<00:00,  1.03 mini-batch/s, testing_accuracy=78.3, testing_loss=0.462]

Accuracy: 0.7788461538461539
Balanced Accuracy: 0.7247863247863248
F1-score: 0.7788461538461539
Kappa: 0.4879406307977736
[[119 115]
 [ 23 367]]
              precision    recall  f1-score   support

      NORMAL       0.84      0.51      0.63       234
   PNEUMONIA       0.76      0.94      0.84       390

    accuracy                           0.78       624
   macro avg       0.80      0.72      0.74       624
weighted avg       0.79      0.78      0.76       624






Unnamed: 0,Loss,Accuracy,Balanced Accuracy,F1-score,Kappa
0,0.462042,0.778846,0.724786,0.778846,0.487941


## Save the model

In [22]:
print("\nThe program are saving the trained model. Please wait ...")
torch.save(model_tl.state_dict(), os.path.join(results_path, my_folder_name, "my_TL.pth"))
print("\nModel saved")


The program are saving the trained model. Please wait ...

Model saved


# CNN

## Classe de notre CNN

In [23]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNModel(nn.Module):
    def __init__(self, input_shape=(3, 224, 224)):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(input_shape[0], 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(0.25)

        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.dropout2 = nn.Dropout(0.25)

        self.conv5 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.dropout3 = nn.Dropout(0.4)

        self.conv7 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv8 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.dropout4 = nn.Dropout(0.4)

        self.conv9 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.conv10 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.dropout5 = nn.Dropout(0.4)

        self.fc1 = nn.Linear(128 * 7 * 7, 256)
        self.dropout6 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv2(F.relu(self.conv1(x)))))
        x = self.dropout1(x)

        x = self.pool(F.relu(self.conv4(F.relu(self.conv3(x)))))
        x = self.dropout2(x)

        x = self.pool(F.relu(self.conv6(F.relu(self.conv5(x)))))
        x = self.dropout3(x)

        x = self.pool(F.relu(self.conv8(F.relu(self.conv7(x)))))
        x = self.dropout4(x)

        x = self.pool(F.relu(self.conv10(F.relu(self.conv9(x)))))
        x = self.dropout5(x)

        x = x.view(-1, 128 * 7 * 7)
        x = F.relu(self.fc1(x))
        x = self.dropout6(x)
        x = self.fc2(x)
        return x

## Load the dataset for our CNN

In [24]:
""" Data Transformation """
# Define the transformations to apply to x data (follow ones applied for ResNet50_Weights.IMAGENET1K_V2)
transform_cnn = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Define the transformations to apply to y labels
target_transform_cnn = transforms.Compose(
    [transforms.Lambda(
        lambda y: torch.zeros(2, dtype=torch.float).scatter_(
            0, 
            torch.tensor(y),
            value=1)
        )
    ]
)

In [25]:
"""" Train, validation and test sets for CNN """
# Load the dataset by applying transformations
train_set_cnn = torchvision.datasets.ImageFolder(train_path , transform=transform_cnn, target_transform=target_transform_cnn)
test_set_cnn = torchvision.datasets.ImageFolder(test_path, transform=transform_cnn, target_transform=target_transform_cnn)
#val_set_cnn = torchvision.datasets.ImageFolder(val_path, transform=transform_cnn, target_transform=target_transform_cnn)

# Define the split ratio and split the dataset
train_ratio = 0.8  # 80% for training, 20% for validation
train_size = int(train_ratio * len(train_set_cnn))
val_size = len(train_set_cnn) - train_size

train_subset_cnn, val_subset_cnn = random_split(train_set_cnn, [train_size, val_size])


# Create the Python iterator for the datasets
train_loader_cnn = torch.utils.data.DataLoader(train_subset_cnn, batch_size=batch_size, shuffle=True)
test_loader_cnn = torch.utils.data.DataLoader(test_set_cnn, batch_size=batch_size, shuffle=False)
val_loader_cnn = torch.utils.data.DataLoader(val_subset_cnn, batch_size=batch_size, shuffle=True)

"""ATTENTION"""
"""Ici l'algorithme fera de la classification binaire PNEUMONIA / NORMAL"""
"""Pour changer cela et spécifier VIRUS ou BACTERIA il faudra les différencier dans les dossiers."""

# Load the first batch of images from the train set
images, labels = next(iter(train_loader_cnn))

# Get the shape of the images
image_shape = list(images.data.shape)
# Get automatically the number of channels of images
image_channel = image_shape[1]
print(image_channel)

input_shape = image_shape[1:]
print(input_shape)

# Get the number of classes from the dataset
classes = test_set_cnn.classes
class_number = len(list(classes))

classes, class_number

3
[3, 224, 224]


(['NORMAL', 'PNEUMONIA'], 2)

## Create our instance of CNN

In [26]:
### CNN
# Instantiate and move the model to GPU
model = CNNModel(input_shape).to(device)

# Print information about the model
get_model_information(model)


Model Summary
                                Layer Name                                  Number of Trainable Parameters  Number of (non trainable) Parameters
          Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))                 896                               0                  
         Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))                9248                               0                  
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)                   0                               0                  
                                            Dropout(p=0.25, inplace=False)                   0                               0                  
         Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))               18496                               0                  
         Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))               36928                     

# Set the loss function and optimizer

In [27]:
# Create the loss function
loss_function = nn.CrossEntropyLoss()
# Create the optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

## Train and test our CNN

In [28]:
# Train the neural network
model = train_model(epoch_number_cnn, train_loader_cnn, val_loader_cnn, model, optimizer, loss_function, device,
                    results_path, my_folder_name)
# Test the neural network
test_model(test_loader_cnn, model, loss_function, device, classes)


Epoch 1/15: 100%|██████████| 131/131 [01:15<00:00,  1.74 mini-batch/s, train_accuracy=74.5, train_loss=0.58]
               Validation step: 100%|██████████| 33/33 [00:16<00:00,  2.03 mini-batch/s, validation_accuracy=73.2, validation_loss=0.582]
Epoch 2/15: 100%|██████████| 131/131 [01:15<00:00,  1.74 mini-batch/s, train_accuracy=74.5, train_loss=0.569]
               Validation step: 100%|██████████| 33/33 [00:16<00:00,  1.96 mini-batch/s, validation_accuracy=73.2, validation_loss=0.582]
Epoch 3/15: 100%|██████████| 131/131 [01:14<00:00,  1.75 mini-batch/s, train_accuracy=74.6, train_loss=0.569]
               Validation step: 100%|██████████| 33/33 [00:16<00:00,  1.95 mini-batch/s, validation_accuracy=73.4, validation_loss=0.591]
Epoch 4/15: 100%|██████████| 131/131 [01:15<00:00,  1.73 mini-batch/s, train_accuracy=74.5, train_loss=0.57]
               Validation step: 100%|██████████| 33/33 [00:16<00:00,  1.97 mini-batch/s, validation_accuracy=73.4, validation_loss=0.582]
Epoch 5/15





Testing the training model: 100%|██████████| 20/20 [00:09<00:00,  2.00 mini-batch/s, testing_accuracy=72.5, testing_loss=1.31]

Accuracy: 0.717948717948718
Balanced Accuracy: 0.6247863247863248
F1-score: 0.717948717948718
Kappa: 0.2931726907630522
[[ 59 175]
 [  1 389]]
              precision    recall  f1-score   support

      NORMAL       0.98      0.25      0.40       234
   PNEUMONIA       0.69      1.00      0.82       390

    accuracy                           0.72       624
   macro avg       0.84      0.62      0.61       624
weighted avg       0.80      0.72      0.66       624






Unnamed: 0,Loss,Accuracy,Balanced Accuracy,F1-score,Kappa
0,1.310066,0.717949,0.624786,0.717949,0.293173


In [29]:
print("\nThe program are saving the trained model. Please wait ...")
torch.save(model.state_dict(), os.path.join(results_path, my_folder_name, "my_CNN.pth"))
print("\nModel saved")


The program are saving the trained model. Please wait ...

Model saved
