In [12]:
import os
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torchvision import models
import numpy as np

### Load ResNet50 pre-trained model

In [13]:
# Download the ResNet50 pre-trained model.
resnet50_model = models.resnet50(pretrained = True) # pretrained: If True, returns a model pre-trained on ImageNet.
print(resnet50_model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [14]:
layers_dict = {
    "layer 1": -5,
    "layer 2": -4,
    "layer 3": -3,
    "layer 4": -2
}

In [15]:
# Create a new model that its output is the output of the chosen intermediate layer from resnet50 model.
resnet50_intermediate_layer_model = nn.Sequential(*list(resnet50_model.children())[:layers_dict["layer 4"]])

### Load ResNet50 MoCo model

In [16]:
# Download the pre-trained weights and save them in a dictionairy.
# The directory where my checkpoints are saved.
checkpoints_directory = r"C:\Nikolaos Sintoris\Education\MEng CSE - UOI\Diploma Thesis\Checkpoints"

moco_checkpoint = torch.load(checkpoints_directory + '\\moco_v2_800ep_pretrain.pth.tar')
moco_checkpoint_dict = moco_checkpoint['state_dict']

# Rename moco pre-trained keys in order to match the ResNet50 keys.
for k in list(moco_checkpoint_dict.keys()):
    # retain only encoder_q up to before the embedding layer
    if k.startswith('module.encoder_q') and not k.startswith('module.encoder_q.fc'):
        # remove prefix
        moco_checkpoint_dict[k[len("module.encoder_q."):]] = moco_checkpoint_dict[k]
    # delete renamed or unused k
    del moco_checkpoint_dict[k]
    
# Download the ResNet50 model.
resnet50_moco_model = models.resnet50(pretrained = False)

# Load the pre-trained weights from MoCo.
resnet50_moco_model.load_state_dict(moco_checkpoint_dict, strict = False)

_IncompatibleKeys(missing_keys=['fc.weight', 'fc.bias'], unexpected_keys=[])

In [17]:
# Create a new model that its output is the output of the chosen intermediate layer from resnet50 MoCo model.
resnet50_moco_intermediate_layer_model = nn.Sequential(*list(resnet50_moco_model.children())[:layers_dict["layer 4"]])

### Load ResNet50 SimCLR model

In [18]:
# Download the pre-trained weights and save them in a dictionairy.
# The directory where my checkpoints are saved.
checkpoints_directory = r"C:\Nikolaos Sintoris\Education\MEng CSE - UOI\Diploma Thesis\Checkpoints"

simclr_checkpoint = torch.load(checkpoints_directory + '\\checkpoint_0040.pth.tar')
simclr_checkpoint_dict = simclr_checkpoint['state_dict']

# Rename moco pre-trained keys in order to match the ResNet50 keys.
for k in list(simclr_checkpoint_dict.keys()):
    # retain only encoder_q up to before the embedding layer
    if k.startswith('backbone') and not k.startswith('backbone.fc'):
        # remove prefix
        simclr_checkpoint_dict[k[len("backbone."):]] = simclr_checkpoint_dict[k]
    # delete renamed or unused k
    del simclr_checkpoint_dict[k]

# Download the ResNet50 model.
resnet50_simclr_model = models.resnet50(pretrained = False)

# Load the pre-trained weights from SimCLR.
resnet50_simclr_model.load_state_dict(simclr_checkpoint_dict, strict = False)

_IncompatibleKeys(missing_keys=['fc.weight', 'fc.bias'], unexpected_keys=[])

In [19]:
# Create a new model that its output is the output of the chosen intermediate layer from resnet50 SimCLR model.
resnet50_simclr_intermediate_layer_model = nn.Sequential(*list(resnet50_simclr_model.children())[:layers_dict["layer 4"]])

### Specify image transformations

In [20]:
# Resize the image to 224x224 because VGG16 takes an input image of this size.
# transforms.Compose(): Composes several transforms together.   
# transforms.Resize(): Resize the input image to the given size.    
# transforms.ToTensor(): Convert a PIL Image or numpy.ndarray to tensor.    
# transforms.Normalize(): Normalize a tensor image with mean and standard deviation for n channels.
transform = transforms.Compose([transforms.ToTensor()])

### Store Embeddings

In [21]:
def averagePooling(my_tensor):
    temp_tensor = nn.AvgPool2d(my_tensor.shape[-1])(my_tensor)
    final_tensor = temp_tensor[:, :, -1]# Convert from 4D to 3D tensor.
    return final_tensor.T # Transpose the tensor in order to have a vector.

def averagePoolingNonSquare(my_tensor):
    temp_tensor = nn.AvgPool2d((my_tensor.shape[-2], my_tensor.shape[-1]))(my_tensor)
    final_tensor = temp_tensor[:, :, -1]# Convert from 4D to 3D tensor.
    return final_tensor.T # Transpose the tensor in order to have a vector.

In [22]:
# A list that has the names of all cases.
cases_folder_names = ['\\case01', '\\case02', '\\case03', '\\case04', '\\case05', '\\case06', '\\case07', '\\case08', 
                      '\\case09', '\\case10', '\\case11', '\\case12', '\\case13', '\\case14', '\\case15', '\\case16',
                      '\\case17', '\\case18', '\\case19', '\\case20', '\\case21', '\\case22', '\\case23']

# A dictionary that has all the possible categories.
labels_dict = {
    "0": 0,
    "1": 1,
    "2": 2
}

# Set the directory that all of my data is saved.
original_data_directory = r"C:\Nikolaos Sintoris\Education\MEng CSE - UOI\Diploma Thesis\Data\CNN_SuperPixelLOPODATA"

for case_name in cases_folder_names:
    print(case_name)
    
    #===============================================================
    #==================== Create Datasets ==========================
    # Complete train and test dataset.
    current_case_directory = original_data_directory + case_name
    train_directory = current_case_directory + '\\TRAIN'
    test_directory = current_case_directory + '\\TEST'

    training_dataset = torchvision.datasets.ImageFolder(root = train_directory , transform = transform)
    test_dataset = torchvision.datasets.ImageFolder(root = test_directory , transform = transform)
    #===============================================================
    #===============================================================
    
    #==============================================================
    #==================== Number of Data ==========================
    # Save the number of training, validation and test data.
    number_of_training_data = len(training_dataset)
    number_of_test_data = len(test_dataset)

    # Validation data is 30% of training data.
    number_of_validation_data = number_of_training_data - round(number_of_training_data * 0.7)
    number_of_training_data = number_of_training_data - number_of_validation_data

    # Create a DataLoader, which can split the training data into batches of size 1, while training. 
    # shuffle=True: ensure that the batches generated in each epoch are different.
    trainloader = torch.utils.data.DataLoader(training_dataset, batch_size = 1, shuffle = True)   
    testloader = torch.utils.data.DataLoader(test_dataset, batch_size = 1, shuffle = True) 

    print("Number of training data: ", number_of_training_data)
    print("Number of validation data: ", number_of_validation_data)
    print("Number of test data: ", number_of_test_data)
    #===============================================================
    #===============================================================
    
    #=================================================================================
    #==================== Training - Validation - Test Data ==========================
    # training_data: A torch tensor that has the output of the model from the training images.
    # actual_training_labels: A torch tensor that has the actual labels of the training images.
    # Same for validation and test.
    classifier_training_data = torch.randn(number_of_training_data, 2048, dtype = torch.float)   
    actual_training_labels = torch.tensor(np.arange(number_of_training_data))
    actual_training_labels = actual_training_labels.type(torch.LongTensor)
    
    classifier_validation_data = torch.randn(number_of_validation_data, 2048, dtype = torch.float)   
    actual_validation_labels = torch.tensor(np.arange(number_of_validation_data))
    actual_validation_labels = actual_validation_labels.type(torch.LongTensor)
    
    classifier_test_data = torch.randn(number_of_test_data, 2048, dtype = torch.float)   
    actual_test_labels = torch.tensor(np.arange(number_of_test_data)) 
    actual_test_labels = actual_test_labels.type(torch.LongTensor)
    
    moco_training_data = torch.randn(number_of_training_data, 2048, dtype = torch.float)   
    moco_validation_data = torch.randn(number_of_validation_data, 2048, dtype = torch.float)   
    moco_test_data = torch.randn(number_of_test_data, 2048, dtype = torch.float)   
    
    simclr_training_data = torch.randn(number_of_training_data, 2048, dtype = torch.float)   
    simclr_validation_data = torch.randn(number_of_validation_data, 2048, dtype = torch.float)   
    simclr_test_data = torch.randn(number_of_test_data, 2048, dtype = torch.float)   

    resnet50_intermediate_layer_model.eval()
    resnet50_moco_intermediate_layer_model.eval()
    resnet50_simclr_intermediate_layer_model.eval()

    train_index = 0
    validation_index = 0
    for image, label in trainloader:
        with torch.no_grad():
            # Take the output, then apply average poooling and store it to the training_data.
            classifier_a = resnet50_intermediate_layer_model(image)
            classifier_b = classifier_a[-1, :, :, :] # Convert from 4D to 3D tensor.
            classifier_output = averagePooling(classifier_b)
            moco_a = resnet50_moco_intermediate_layer_model(image)
            moco_b = moco_a[-1, :, :, :] # Convert from 4D to 3D tensor.
            moco_output = averagePooling(moco_b)
            simclr_a = resnet50_simclr_intermediate_layer_model(image)
            simclr_b = simclr_a[-1, :, :, :] # Convert from 4D to 3D tensor.
            simclr_output = averagePoolingNonSquare(simclr_b)

        # The last 30% of training data store it as validation data.
        if(train_index < number_of_training_data):
            classifier_training_data[train_index] = classifier_output
            moco_training_data[train_index] = moco_output
            simclr_training_data[train_index] = simclr_output

            # Create a toch tensor that has the actual labels of the training set.
            str_label = str(label.item())
            actual_training_labels[train_index] = labels_dict[str_label]

            train_index = train_index + 1
        else:
            classifier_validation_data[validation_index] = classifier_output
            moco_validation_data[validation_index] = moco_output
            simclr_validation_data[validation_index] = simclr_output

            # Create a toch tensor that has the actual labels of the validation set.
            str_label = str(label.item())
            actual_validation_labels[validation_index] = labels_dict[str_label]

            validation_index = validation_index + 1
    
    test_index = 0
    for image, label in testloader:
        with torch.no_grad():
            # Take the output, then apply average poooling and store it to the test_data.
            classifier_a = resnet50_intermediate_layer_model(image)
            classifier_b = classifier_a[-1, :, :, :] # Convert from 4D to 3D tensor.
            classifier_output = averagePooling(classifier_b)
            moco_a = resnet50_moco_intermediate_layer_model(image)
            moco_b = moco_a[-1, :, :, :] # Convert from 4D to 3D tensor.
            moco_output = averagePooling(moco_b)
            simclr_a = resnet50_simclr_intermediate_layer_model(image)
            simclr_b = simclr_a[-1, :, :, :] # Convert from 4D to 3D tensor.
            simclr_output = averagePoolingNonSquare(simclr_b)
            
        classifier_test_data[test_index] = classifier_output
        moco_test_data[test_index] = moco_output
        simclr_test_data[test_index] = simclr_output

        # Create a toch tensor that has the actual labels of the training set.
        str_label = str(label.item())
        actual_test_labels[test_index] = labels_dict[str_label]

        test_index = test_index + 1
    
    print("ResNet50-Classifier")
    print("Train Data: ")
    print("\tTraining data shape: ", classifier_training_data.shape)
    print("\tTraining labels shape: ", actual_training_labels.shape)

    print("Validation Data: ")
    print("\tValidation data shape: ", classifier_validation_data.shape)
    print("\tValidation labels shape: ", actual_validation_labels.shape)

    print("Test Data: ")
    print("\tTest data shape: ", classifier_test_data.shape)
    print("\tTest labels shape: ", actual_test_labels.shape)
    
    print("ResNet50-MoCo")
    print("Train Data: ")
    print("\tTraining data shape: ", moco_training_data.shape)
    print("\tTraining labels shape: ", actual_training_labels.shape)

    print("Validation Data: ")
    print("\tValidation data shape: ", moco_validation_data.shape)
    print("\tValidation labels shape: ", actual_validation_labels.shape)

    print("Test Data: ")
    print("\tTest data shape: ", moco_test_data.shape)
    print("\tTest labels shape: ", actual_test_labels.shape)
    
    print("ResNet50-SimCLR")
    print("Train Data: ")
    print("\tTraining data shape: ", simclr_training_data.shape)
    print("\tTraining labels shape: ", actual_training_labels.shape)

    print("Validation Data: ")
    print("\tValidation data shape: ", simclr_validation_data.shape)
    print("\tValidation labels shape: ", actual_validation_labels.shape)

    print("Test Data: ")
    print("\tTest data shape: ", simclr_test_data.shape)
    print("\tTest labels shape: ", actual_test_labels.shape)
    #=====================================================================
    #=====================================================================
    
    #=====================================================================
    #==================== Convert tensors to np ==========================
    # Convert tensors to numpy arrays in order to store them in csv files.
    classifier_training_data_np = classifier_training_data.numpy()
    actual_training_labels_np = actual_training_labels.numpy()

    classifier_validation_data_np = classifier_validation_data.numpy()
    actual_validation_labels_np = actual_validation_labels.numpy()

    classifier_test_data_np = classifier_test_data.numpy()
    actual_test_labels_np = actual_test_labels.numpy()
    
    moco_training_data_np = moco_training_data.numpy()
    moco_validation_data_np = moco_validation_data.numpy()
    moco_test_data_np = moco_test_data.numpy()
    
    simclr_training_data_np = simclr_training_data.numpy()
    simclr_validation_data_np = simclr_validation_data.numpy()
    simclr_test_data_np = simclr_test_data.numpy()
    #=====================================================================
    #=====================================================================

    #============================================================================
    #==================== Store embeddings in csv files =========================
    # Store the embeddings in csv files.
    classifier_target_directory = r"C:\Nikolaos Sintoris\Education\MEng CSE - UOI\Diploma Thesis\Training Results\Layer 4\Embeddings\ResNet50 Classifier"
    classifier_current_case_embeddings_directory = classifier_target_directory + case_name

    if not os.path.exists(classifier_current_case_embeddings_directory):
        os.makedirs(classifier_current_case_embeddings_directory)
    
    moco_target_directory = r"C:\Nikolaos Sintoris\Education\MEng CSE - UOI\Diploma Thesis\Training Results\Layer 4\Embeddings\ResNet50 MoCo"
    moco_current_case_embeddings_directory = moco_target_directory + case_name

    if not os.path.exists(moco_current_case_embeddings_directory):
        os.makedirs(moco_current_case_embeddings_directory)
    
    simclr_target_directory = r"C:\Nikolaos Sintoris\Education\MEng CSE - UOI\Diploma Thesis\Training Results\Layer 4\Embeddings\ResNet50 SimCLR"
    simclr_current_case_embeddings_directory = simclr_target_directory + case_name

    if not os.path.exists(simclr_current_case_embeddings_directory):
        os.makedirs(simclr_current_case_embeddings_directory)

    case_data = ["training_data", "actual_training_labels", "validation_data", "actual_validation_labels", "test_data", "actual_test_labels"]
    
    classifier_case_data_np_dict = {
        "training_data": classifier_training_data_np,
        "actual_training_labels": actual_training_labels_np,
        "validation_data": classifier_validation_data_np,
        "actual_validation_labels": actual_validation_labels_np,
        "test_data": classifier_test_data_np,
        "actual_test_labels": actual_test_labels_np
    }
    
    moco_case_data_np_dict = {
        "training_data": moco_training_data_np,
        "actual_training_labels": actual_training_labels_np,
        "validation_data": moco_validation_data_np,
        "actual_validation_labels": actual_validation_labels_np,
        "test_data": moco_test_data_np,
        "actual_test_labels": actual_test_labels_np
    }
    
    simclr_case_data_np_dict = {
        "training_data": simclr_training_data_np,
        "actual_training_labels": actual_training_labels_np,
        "validation_data": simclr_validation_data_np,
        "actual_validation_labels": actual_validation_labels_np,
        "test_data": simclr_test_data_np,
        "actual_test_labels": actual_test_labels_np
    }

    case_data_csv_dict = {
        "training_data": "\\training_data.csv",
        "actual_training_labels": "\\actual_training_labels.csv",
        "validation_data": "\\validation_data.csv",
        "actual_validation_labels": "\\actual_validation_labels.csv",
        "test_data": "\\test_data.csv",
        "actual_test_labels": "\\actual_test_labels.csv"
    }

    # Save every numpy array to a different csv file.
    for current_data in case_data:
        np.savetxt(classifier_current_case_embeddings_directory + case_data_csv_dict[current_data], classifier_case_data_np_dict[current_data], delimiter = ',')
        
    for current_data in case_data:
        np.savetxt(moco_current_case_embeddings_directory + case_data_csv_dict[current_data], moco_case_data_np_dict[current_data], delimiter = ',')
    
    for current_data in case_data:
        np.savetxt(simclr_current_case_embeddings_directory + case_data_csv_dict[current_data], simclr_case_data_np_dict[current_data], delimiter = ',')
    #=====================================================================
    #=====================================================================

\case01
Number of training data:  7526
Number of validation data:  3225
Number of test data:  813
ResNet50-Classifier
Train Data: 
	Training data shape:  torch.Size([7526, 2048])
	Training labels shape:  torch.Size([7526])
Validation Data: 
	Validation data shape:  torch.Size([3225, 2048])
	Validation labels shape:  torch.Size([3225])
Test Data: 
	Test data shape:  torch.Size([813, 2048])
	Test labels shape:  torch.Size([813])
ResNet50-MoCo
Train Data: 
	Training data shape:  torch.Size([7526, 2048])
	Training labels shape:  torch.Size([7526])
Validation Data: 
	Validation data shape:  torch.Size([3225, 2048])
	Validation labels shape:  torch.Size([3225])
Test Data: 
	Test data shape:  torch.Size([813, 2048])
	Test labels shape:  torch.Size([813])
ResNet50-SimCLR
Train Data: 
	Training data shape:  torch.Size([7526, 2048])
	Training labels shape:  torch.Size([7526])
Validation Data: 
	Validation data shape:  torch.Size([3225, 2048])
	Validation labels shape:  torch.Size([3225])
Test Da

\case09
Number of training data:  7437
Number of validation data:  3187
Number of test data:  1017
ResNet50-Classifier
Train Data: 
	Training data shape:  torch.Size([7437, 2048])
	Training labels shape:  torch.Size([7437])
Validation Data: 
	Validation data shape:  torch.Size([3187, 2048])
	Validation labels shape:  torch.Size([3187])
Test Data: 
	Test data shape:  torch.Size([1017, 2048])
	Test labels shape:  torch.Size([1017])
ResNet50-MoCo
Train Data: 
	Training data shape:  torch.Size([7437, 2048])
	Training labels shape:  torch.Size([7437])
Validation Data: 
	Validation data shape:  torch.Size([3187, 2048])
	Validation labels shape:  torch.Size([3187])
Test Data: 
	Test data shape:  torch.Size([1017, 2048])
	Test labels shape:  torch.Size([1017])
ResNet50-SimCLR
Train Data: 
	Training data shape:  torch.Size([7437, 2048])
	Training labels shape:  torch.Size([7437])
Validation Data: 
	Validation data shape:  torch.Size([3187, 2048])
	Validation labels shape:  torch.Size([3187])
Te

\case17
Number of training data:  7323
Number of validation data:  3139
Number of test data:  1289
ResNet50-Classifier
Train Data: 
	Training data shape:  torch.Size([7323, 2048])
	Training labels shape:  torch.Size([7323])
Validation Data: 
	Validation data shape:  torch.Size([3139, 2048])
	Validation labels shape:  torch.Size([3139])
Test Data: 
	Test data shape:  torch.Size([1289, 2048])
	Test labels shape:  torch.Size([1289])
ResNet50-MoCo
Train Data: 
	Training data shape:  torch.Size([7323, 2048])
	Training labels shape:  torch.Size([7323])
Validation Data: 
	Validation data shape:  torch.Size([3139, 2048])
	Validation labels shape:  torch.Size([3139])
Test Data: 
	Test data shape:  torch.Size([1289, 2048])
	Test labels shape:  torch.Size([1289])
ResNet50-SimCLR
Train Data: 
	Training data shape:  torch.Size([7323, 2048])
	Training labels shape:  torch.Size([7323])
Validation Data: 
	Validation data shape:  torch.Size([3139, 2048])
	Validation labels shape:  torch.Size([3139])
Te