In [2]:
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

from sklearn.metrics import precision_score, recall_score, confusion_matrix, f1_score

torch.manual_seed(5)

<torch._C.Generator at 0x19f62ffccb0>

# Import Images

In [4]:
image_dir_p1 = "C:/Users/Derp/Documents/CS 184A BioAi/Project/HAM10000_img/HAM10000_images_part_1"
image_dir_p2 = "C:/Users/Derp/Documents/CS 184A BioAi/Project/HAM10000_img/HAM10000_images_part_2"
image_dir_p3 = "C:/Users/Derp/Documents/CS 184A BioAi/Project/HAM10000_img/HAM10000_segmentations_lesion_tschandl/HAM10000_segmentations_lesion_tschandl"

image_list = []
name_list = []

seg_list = []
segname_list = []
#Load Images(450, 600, 3)
for images in os.listdir(image_dir_p1):
    image = os.path.join(image_dir_p1, images)
    image = cv2.imread(image)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (224, 224))

    name_list.append(images[:-4])
    image_list.append(image)

# #Load Images
for images in os.listdir(image_dir_p2):
    image = os.path.join(image_dir_p2, images)
    image = cv2.imread(image)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (224, 224))

    name_list.append(images[:-4])
    image_list.append(image)


#Load Segmentation Images
for images in os.listdir(image_dir_p3):
    image = os.path.join(image_dir_p3, images)
    image = cv2.imread(image)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image, (224, 224))
    
    segname_list.append(images[:-17])
    seg_list.append(image)


# Classes

In [12]:
class Melanoma_Dataset(Dataset):
    def __init__(self, df, column_data, column_y, weights, device):
        super(Melanoma_Dataset, self).__init__()
        self.df = df
        self.column_data = column_data
        self.column_y = column_y
        self.weights= weights
        self.device = device

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        '''Return rows of panda columns'''

        cur_img = self.df.loc[idx, self.column_data]
        cur_img = torch.tensor(cur_img.tolist(), dtype=torch.float32)
        cur_img = cur_img.to(self.device)

        temp_list = np.zeros(7)
        temp_list[int(self.column_y[idx])] = 1
        cur_target = torch.tensor(temp_list, dtype=torch.float32)
        cur_target = cur_target.to(self.device)

        cur_weights = self.df.loc[idx, self.weights]
        cur_weights = torch.tensor(cur_weights.tolist(), dtype=torch.float32)
        cur_weights = cur_weights.to(self.device)


        return cur_img, cur_target, cur_weights

def transformToTensor(X):
    ret_list = []
    transform = transforms.Compose([
                transforms.ToTensor()
              ])
    for image in X:
      ret_list.append(transform(np.array(image)))

    return torch.tensor(np.array(ret_list), dtype=torch.float32, device ="cuda")


def weight_accuracy(predicted_y, true_y, weight):
    pred_y, tar_y, w = np.array(predicted_y), np.array(true_y), np.array(weight)
    return np.sum((pred_y == tar_y) * w.reshape(w.size)) / np.sum(w)

def num_accuracy(predicted_y, true_y):
    pred_y, tar_y = np.array(predicted_y), np.array(true_y)
    return np.sum((pred_y == tar_y)) / len(pred_y)




# Process images/data 

In [14]:
image_metadata = "C:/Users/Derp/Documents/CS 184A BioAi/Project/HAM10000_img/HAM10000_metadata"
image_metadata = pd.read_csv(image_metadata)
print(image_metadata.shape)

#(10015)
y = image_metadata[["image_id", "dx"]]

#(10015, 450, 600, 3)
image_dataframe = pd.DataFrame({'image_id': name_list, 'data':image_list})
seg_dataframe = pd.DataFrame({'image_id': segname_list, 'segdata': seg_list})

#Clear memory
name_list =[]
image_list =[]
segname_list =[]
seg_list = []

#Weights
temp = pd.Categorical(image_metadata["dx"])
values = temp.value_counts()

weight_list = []
label_list = []
for label in temp.categories:
    label_list.append(label)
    weight_list.append(10015/(7*values[label]))
weighted = pd.DataFrame({'dx': label_list, 'weights':weight_list})
print(weighted)

#Merge dataframes
fin_dataframe = pd.merge(image_dataframe, y)
fin_dataframe = pd.merge(fin_dataframe, seg_dataframe)
fin_dataframe = pd.merge(fin_dataframe, weighted)
print(fin_dataframe)

#Mask images
for index, row in fin_dataframe.iterrows():
    fin_dataframe.at[index, 'data'] = cv2.bitwise_and(row['data'], row['data'], mask=row['segdata'])


plt.figure()
plt.imshow(fin_dataframe.iloc[0]['data'])
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/Derp/Documents/CS 184A BioAi/Project/HAM10000_img/HAM10000_metadata'

# Load into Dataloaders

In [306]:
#Shuffle Data
fin_dataframe = fin_dataframe.sample(frac=1,random_state=5).reset_index(drop=True)
print(fin_dataframe.shape)
all_weights = torch.tensor(np.unique(fin_dataframe['weights']), dtype=torch.float32)
print("All weights:", all_weights)

#80:20 Split
train_dataset = fin_dataframe.iloc[:int(.8*len(fin_dataframe ))]
valid_dataset = fin_dataframe.iloc[int(.8*len(fin_dataframe )):].reset_index(drop=True)

print(train_dataset.shape)
print(valid_dataset.shape)
print(pd.Categorical(valid_dataset["dx"]).unique())

train_dataset = Melanoma_Dataset(train_dataset, "data", pd.Categorical(train_dataset["dx"]).codes, "weights")
valid_dataset = Melanoma_Dataset(valid_dataset, "data", pd.Categorical(valid_dataset["dx"]).codes, "weights")

train_dataset = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)
valid_dataset = torch.utils.data.DataLoader(dataset=valid_dataset, batch_size=128, shuffle=False)
X,Y, W = next(iter(train_dataset))
print(X.shape, Y.shape, W.shape)

print(len(valid_dataset))
X,Y, W = next(iter(valid_dataset))
print(X.shape, Y.shape, W.shape)

(10015, 5)
All weights: tensor([ 0.2134,  1.2855,  1.3018,  2.7835,  4.3753, 10.0755, 12.4410])
(8012, 5)
(2003, 5)
['df', 'nv', 'bkl', 'mel', 'vasc', 'akiec', 'bcc']
Categories (7, object): ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']
torch.Size([128, 224, 224, 3]) torch.Size([128, 7]) torch.Size([128])
16
torch.Size([128, 224, 224, 3]) torch.Size([128, 7]) torch.Size([128])


# CNN Model

In [18]:
resnet50 = torchvision.models.resnet50(weights='DEFAULT')
resnet50.fc = nn.Linear(resnet50.fc.in_features, 7)

#Freeze the pre-trained layers
for parameter in resnet50.parameters():
    parameter.requires_grad = False

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet50.parameters(), lr=0.0001)

#Unfreeze last few parameters
for parameter in resnet50.layer4.parameters():
    parameter.requires_grad = True

for parameter in resnet50.fc.parameters():
    parameter.requires_grad = True

conf_list = []
precise_list = []
recall_list = []
f1_list = []
weightacc_list = []
numacc_list = []


In [20]:
num_epochs = 15
total_step = len(train_dataset)
for epoch in range(num_epochs):
    resnet50.train()
    for i, (X, Y, W) in enumerate(train_dataset):
        # Forward pass
        X, Y, W = X.to(device), Y.to(device), W.to(device)

        X = transformToTensor(X.cpu().data)

        output = resnet50(X)
        criterion.weight = all_weights
        loss = criterion(output, Y)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
               .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    y_pred_list = np.empty(0)
    y_target_list = np.empty(0)
    y_weight_list = np.empty(0)
    resnet50.eval()
    with torch.no_grad():
        for i, (X, Y, W) in enumerate(valid_dataset):
            X, Y, W = X.to(device), Y.to(device), W.to(device)

            X = transformToTensor(X.cpu().data)

            output = resnet50(X)

            y_pred_list = np.concatenate((y_pred_list, (np.argmax(np.array(output.cpu().data), axis = 1))), axis = 0)

            y_target_list = np.concatenate((y_target_list, (np.argmax(np.array(Y.cpu().data), axis=1))), axis= 0)

            y_weight_list = np.concatenate((y_weight_list, np.array(W.cpu().data)), axis=0)



    conf_list.append(confusion_matrix(y_pred_list,y_target_list))
    precise_list.append(precision_score(y_pred_list,y_target_list, average="weighted"))
    recall_list.append(recall_score(y_pred_list,y_target_list, average="weighted"))
    f1_list.append(f1_score(y_pred_list,y_target_list, average="weighted"))
    weightacc_list.append(weight_accuracy(y_pred_list,y_target_list, y_weight_list))
    numacc_list.append(num_accuracy(y_pred_list,y_target_list))

    print("Accuracy:", weight_accuracy(y_pred_list,y_target_list, y_weight_list))
    print("Num Accuracy:", num_accuracy(y_pred_list,y_target_list))


NameError: name 'train_dataset' is not defined

In [318]:

total_step = len(train_dataset)
for epoch in range(25):
    model.train()
    for i, (X, Y, W) in enumerate(train_dataset):
        # Forward pass
        output = model(X)
        Y = np.argmax(Y, axis=1)
    
        # W = W.reshape(len(X), 1)
        criterion.weight = all_weights
        loss = criterion(output, Y)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
               .format(epoch+1, num_epochs, i+1, total_step, loss.item()))


    y_pred_list = np.empty(0)
    y_target_list = np.empty(0)
    y_weight_list = np.empty(0)
    model.eval()
    with torch.no_grad():
        for i, (X, Y, W) in enumerate(valid_dataset):
            output = model(X)
    
            y_pred_list = np.concatenate((y_pred_list, (np.argmax(output, axis = 1))), axis = 0)
    
            y_target_list = np.concatenate((y_target_list, (np.argmax(Y, axis=1))), axis= 0)
    
            y_weight_list = np.concatenate((y_weight_list, W), axis=0)
    
    
    
    conf_list.append(confusion_matrix(y_pred_list,y_target_list))
    precise_list.append(precision_score(y_pred_list,y_target_list, average="weighted"))
    recall_list.append(recall_score(y_pred_list,y_target_list, average="weighted"))
    f1_list.append(f1_score(y_pred_list,y_target_list, average="weighted"))
    weightacc_list.append(weight_accuracy(y_pred_list,y_target_list, y_weight_list))
    numacc_list.append(num_accuracy(y_pred_list,y_target_list))
    
    total_epochs += num_epochs
    print("Accuracy:", weight_accuracy(y_pred_list,y_target_list, y_weight_list))
    print("Num Accuracy:", num_accuracy(y_pred_list,y_target_list))
    if (epoch+1 in [1, 3, 5, 10, 25]):
        torch.save(model.state_dict(), "C:/Users/Derp/Documents/CS 184A BioAi/Project/HAM10000_img/cnn_parameters_epoch{}.pth".format(epoch+1))

np.save('C:/Users/Derp/Documents/CS 184A BioAi/Project/HAM10000_img/cnn_conf_list.npy', np.array(conf_list))
np.save('C:/Users/Derp/Documents/CS 184A BioAi/Project/HAM10000_img/cnn_precise_list.npy', np.array(precise_list))
np.save('C:/Users/Derp/Documents/CS 184A BioAi/Project/HAM10000_img/cnn_recall_list.npy', np.array(recall_list))
np.save('C:/Users/Derp/Documents/CS 184A BioAi/Project/HAM10000_img/cnn_f1_list.npy', np.array(f1_list))
np.save('C:/Users/Derp/Documents/CS 184A BioAi/Project/HAM10000_img/cnn_weightacc_list.npy', np.array(weightacc_list))
np.save('C:/Users/Derp/Documents/CS 184A BioAi/Project/HAM10000_img/cnn_numacc_list.npy', np.array(numacc_list))

Epoch [1/1], Step [1/63], Loss: 24.9255
Epoch [1/1], Step [2/63], Loss: 82.8100
Epoch [1/1], Step [3/63], Loss: 66.6724
Epoch [1/1], Step [4/63], Loss: 125.3123
Epoch [1/1], Step [5/63], Loss: 79.3465
Epoch [1/1], Step [6/63], Loss: 68.4869
Epoch [1/1], Step [7/63], Loss: 117.9308
Epoch [1/1], Step [8/63], Loss: 80.8077
Epoch [1/1], Step [9/63], Loss: 89.5297
Epoch [1/1], Step [10/63], Loss: 50.9546
Epoch [1/1], Step [11/63], Loss: 48.4022
Epoch [1/1], Step [12/63], Loss: 51.8792
Epoch [1/1], Step [13/63], Loss: 24.2481
Epoch [1/1], Step [14/63], Loss: 19.6169
Epoch [1/1], Step [15/63], Loss: 17.2331
Epoch [1/1], Step [16/63], Loss: 13.1091
Epoch [1/1], Step [17/63], Loss: 8.7620
Epoch [1/1], Step [18/63], Loss: 11.1857
Epoch [1/1], Step [19/63], Loss: 5.3491
Epoch [1/1], Step [20/63], Loss: 4.4548
Epoch [1/1], Step [21/63], Loss: 3.9092
Epoch [1/1], Step [22/63], Loss: 2.7961
Epoch [1/1], Step [23/63], Loss: 2.5295
Epoch [1/1], Step [24/63], Loss: 3.3422
Epoch [1/1], Step [25/63], Los

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.15293166642815922
Num Accuracy: 0.6280579131303046
Epoch [2/1], Step [1/63], Loss: 0.5931
Epoch [2/1], Step [2/63], Loss: 0.7842
Epoch [2/1], Step [3/63], Loss: 0.5912
Epoch [2/1], Step [4/63], Loss: 0.5022
Epoch [2/1], Step [5/63], Loss: 0.4862
Epoch [2/1], Step [6/63], Loss: 0.6970
Epoch [2/1], Step [7/63], Loss: 0.5329
Epoch [2/1], Step [8/63], Loss: 0.5751
Epoch [2/1], Step [9/63], Loss: 0.5537
Epoch [2/1], Step [10/63], Loss: 0.4612
Epoch [2/1], Step [11/63], Loss: 0.4783
Epoch [2/1], Step [12/63], Loss: 0.4080
Epoch [2/1], Step [13/63], Loss: 0.6936
Epoch [2/1], Step [14/63], Loss: 0.5996
Epoch [2/1], Step [15/63], Loss: 0.5001
Epoch [2/1], Step [16/63], Loss: 0.4534
Epoch [2/1], Step [17/63], Loss: 0.5137
Epoch [2/1], Step [18/63], Loss: 0.5149
Epoch [2/1], Step [19/63], Loss: 0.3783
Epoch [2/1], Step [20/63], Loss: 0.4799
Epoch [2/1], Step [21/63], Loss: 0.5594
Epoch [2/1], Step [22/63], Loss: 0.6630
Epoch [2/1], Step [23/63], Loss: 0.5351
Epoch [2/1], Step [24/63],