### Image Ingestion

In [1]:
import os
import pandas as pd
import shutil

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt

from collections import Counter
import torch.nn.functional as F

root_folder = 'Data_Full'
excel_file = 'Data_Full/metadata.csv'
image_folder = os.path.join(root_folder, 'images')
# train_folder = os.path.join(root_folder, 'train')
# validation_folder = os.path.join(root_folder, 'validation')
# test_folder = os.path.join(root_folder, 'test')

In [2]:
# Create 'train' and 'test' folders if they don't exist
# os.makedirs(train_folder, exist_ok=True)
# os.makedirs(validation_folder, exist_ok=True)
# os.makedirs(test_folder, exist_ok=True)

In [3]:
# Read the metadata file.
df = pd.read_csv(excel_file)

In [4]:
# Remove Nulls and remove indeterminate class.
df = df[~df['benign_malignant'].isna()]
df = df[df['benign_malignant'].isin(['benign','malignant'])]
df.head(5)

Unnamed: 0,isic_id,attribution,copyright_license,age_approx,anatom_site_general,benign_malignant,diagnosis,diagnosis_confirm_type,image_type,melanocytic,sex
0,ISIC_0000000,Anonymous,CC-0,55.0,anterior torso,benign,nevus,,dermoscopic,True,female
1,ISIC_0000001,Anonymous,CC-0,30.0,anterior torso,benign,nevus,,dermoscopic,True,female
2,ISIC_0000002,Anonymous,CC-0,60.0,upper extremity,malignant,melanoma,histopathology,dermoscopic,True,female
3,ISIC_0000004,Anonymous,CC-0,80.0,posterior torso,malignant,melanoma,histopathology,dermoscopic,True,male
4,ISIC_0000006,Anonymous,CC-0,25.0,posterior torso,benign,nevus,,dermoscopic,True,female


In [5]:
print('Unique Images',df['isic_id'].nunique())
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

Unique Images 8713


device(type='cuda', index=0)

In [6]:
# Move images to their respective folders
for index, row in df.iterrows():
    image_filename = row['isic_id'] + '.jpg'
    label = row['benign_malignant']
    src_path = os.path.join(image_folder, image_filename)
    dst_path = os.path.join(image_folder, label, image_filename)
    os.makedirs(os.path.dirname(dst_path), exist_ok=True)
    shutil.copy(src_path, dst_path)


In [7]:
from torchvision import transforms,datasets
from torch.utils.data import DataLoader
from torchsampler import ImbalancedDatasetSampler

# Define the transformations to be applied to the images
transform = transforms.Compose([
    transforms.Resize((512,512)),  # Resize the images to a fixed size
    transforms.RandomHorizontalFlip(), # Randomly flip images 
    transforms.RandomRotation(15), # Randomly rotate images
    transforms.ToTensor(),         # Convert images to tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the image data
])

# Create datasets for 'train' and 'test' sets
image_dataset = datasets.ImageFolder(image_folder,transform = transform)



# In each epoch, the loader will sample the entire dataset and weigh your samples inversely to your class appearing probability.
image_loader = DataLoader(image_dataset, batch_size=1, sampler=ImbalancedDatasetSampler(image_dataset))


In [8]:
print(dict(Counter(image_dataset.targets)))

{0: 7424, 1: 1289}


In [9]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(4, 4)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(14400, 120)
        self.fc2 = nn.Linear(120, 120)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

net = Net().to(device)

In [10]:
def random_image(num_points):
    image_loader = DataLoader(image_dataset, batch_size=num_points,sampler=ImbalancedDatasetSampler(image_dataset))

    output_images = []
    output_labels = []
    
    for i, data in enumerate(image_loader, 0):
        inputs, labels = data
                    
        inputs = inputs.to(device)
        labels = labels.to(device)
        return net(inputs).detach(), labels     

In [11]:
i,l = random_image(2)
print(i.shape)
print(l.shape)

torch.Size([2, 120])
torch.Size([2])


In [12]:
class FeedforwardNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FeedforwardNN, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size) #bias = False?
        self.relu1 = nn.ReLU()
        self.output_layer = nn.Linear(hidden_size, output_size)
        self.multiplier = np.sqrt(hidden_size)

    def forward(self, x):
        x = self.relu1(self.layer1(x))
        x = self.output_layer(x)
        x = x*self.multiplier
        return x

# Set random seed for reproducibility
torch.manual_seed(42)

# Define synthetic input data
input_size = 120
synthetic_input = torch.rand(input_size)

# Instantiate the neural network
hidden_size = 20
output_size = 1
model_1 = FeedforwardNN(input_size, hidden_size, output_size)

# Forward pass
output = model_1(synthetic_input)

# To GPU
model_1 = model_1.to(device)

# Print the architecture and output shape
print(model_1)
print("Output shape:", output.shape)

FeedforwardNN(
  (layer1): Linear(in_features=120, out_features=20, bias=True)
  (relu1): ReLU()
  (output_layer): Linear(in_features=20, out_features=1, bias=True)
)
Output shape: torch.Size([1])


In [13]:
initial_weights = model_1.state_dict()

In [14]:
p = 0
for name, param in model_1.named_parameters():
    if 'weight' in name:
        x,y = param.size()
        p += x*y
        print(f'Layer: {name}, Size: {param.size()}')
print('Number of parameters:',p)

Layer: layer1.weight, Size: torch.Size([20, 120])
Layer: output_layer.weight, Size: torch.Size([1, 20])
Number of parameters: 2420


In [15]:
class CustomLoss(nn.Module):
    def __init__(self):
        super(CustomLoss, self).__init__()

    def forward(self, predicted, target):
        loss = (0.5*torch.sum((predicted - target)**2))/target.shape[0]
        return loss

In [16]:
def train_nn(input_data,best_rewards,initial_weights,step_size, J = 20):
    model_inside = FeedforwardNN(input_size, hidden_size, output_size).to(device)
    model_inside.load_state_dict(initial_weights)

    loss_function = CustomLoss()
#     loss_function = nn.BCELoss()

    optimizer = optim.SGD(model_inside.parameters(),lr = step_size) # Need to fix to GD?
    size = len(input_data)
    rewards = best_rewards.reshape(-1,1)
    dataset = TensorDataset(input_data,rewards)
    
    batch_size = 50
    shuffle = True  # Set to True if you want to shuffle the data
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)    

    flattened_initial_weights = torch.cat([p.flatten() for p in initial_weights.values()])
    
    for j in range(J):
        for inputs,targets in data_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            output = model_inside(inputs)
#             current_weights = model_inside.state_dict()
#             flattened_current_weights = torch.cat([p.flatten() for p in current_weights.values()])

            loss = loss_function(output, targets)
            loss.backward() # Not sure retain_graph?
            optimizer.step()
    print('Loss',loss)
        
    print('--------------------------------------')

    model_weights = model_inside.state_dict()
    return model_weights

In [17]:
def NeuralUCB(model,initial_weights, T, lamb = 1, step_size = 0.001):
    Z = lamb * torch.eye(p).to(device)
    K = 2
    dim = 20 # feature vector dimension (context)
    m = 20 # Neural Network width
    gamma = 1
    best_contexts = []
    best_rewards = []
    regrets = []
    for t in range(T):
        synthetic_input,synthetic_label = random_image(K)
        all_grad = []
        ucbs = []
        for k in range(K):
            output = model(synthetic_input[k])
            model.zero_grad()
            output.backward(torch.ones_like(output),retain_graph = True)
            g = []
            for name, param in model.named_parameters():
                if 'weight' in name:
                    tmp = param.grad.flatten()
                    g.append(tmp)
            g = torch.cat(g,dim = 0).reshape(-1,1).to(device)
    
    
            Z_inv = torch.inverse(Z)
            
            
            all_grad.append(g)
            exploration_reward = gamma*torch.sqrt((g.T@Z_inv@g)/m) 
            ucb_a = output + exploration_reward
            ucbs.append(ucb_a.item())
            
            
        ucbs = torch.tensor(ucbs)
        best_action = torch.argmax(ucbs).item()
        best_context = synthetic_input[best_action]
        best_g = all_grad[best_action]
        best_reward = synthetic_label[best_action].item()
        Z = Z + (best_g@best_g.T/m)
        
        best_contexts.append(best_context)
        best_rewards.append(best_reward)
        

        if (t+1)%50 == 0:
            ### Train Neural Network
            new_weights = train_nn(torch.stack(best_contexts),
                                   torch.tensor(best_rewards),
                                   initial_weights,
                                   step_size = step_size,
                                   J = t+1)
            model.load_state_dict(new_weights)
            print(f'Training NN step {t+1}')
            print('Cumulative Regret: ' , np.sum(regrets))
            
#         print('abc',best_reward-torch.max(synthetic_label).item())
        regret = np.abs(best_reward-torch.max(synthetic_label).item())
        regrets.append(regret)
#         print('regrets',regrets)
        
        
        
    return model,regrets

### Image Data

In [None]:
model_1,regrets_1 = NeuralUCB(model_1,initial_weights,1000)

Loss tensor(0.1413, device='cuda:0', grad_fn=<DivBackward0>)
--------------------------------------
Training NN step 50
Cumulative Regret:  15
Loss tensor(0.1269, device='cuda:0', grad_fn=<DivBackward0>)
--------------------------------------
Training NN step 100
Cumulative Regret:  29
Loss tensor(0.1218, device='cuda:0', grad_fn=<DivBackward0>)
--------------------------------------
Training NN step 150
Cumulative Regret:  41
Loss tensor(0.1227, device='cuda:0', grad_fn=<DivBackward0>)
--------------------------------------
Training NN step 200
Cumulative Regret:  55
Loss tensor(0.1227, device='cuda:0', grad_fn=<DivBackward0>)
--------------------------------------
Training NN step 250
Cumulative Regret:  70
Loss tensor(0.1114, device='cuda:0', grad_fn=<DivBackward0>)
--------------------------------------
Training NN step 300
Cumulative Regret:  84
Loss tensor(0.1106, device='cuda:0', grad_fn=<DivBackward0>)
--------------------------------------
Training NN step 350
Cumulative Regre

In [None]:
def plot_cumulative(data):
    cumulative_sum = np.cumsum(data)
    plt.plot(cumulative_sum)
    plt.xlabel('Index')
    plt.ylabel('Cumulative Sum')
    plt.title('Cumulative Sum Plot')
    plt.grid(True)
    plt.show()

In [None]:
plot_cumulative(regrets_1)

In [None]:
image,label = random_image(2)
label

In [None]:
model_1(image)