In [1]:
import os
import random

import scipy.io
import numpy as np 

from PIL import Image
from skimage import io, color
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data_utils

import torchvision.transforms as transforms
import torchvision.models as models

import torch_geometric.utils as pyg_ut
from torch_geometric.nn import DenseSAGEConv, dense_diff_pool

import python_data.utils_augmentation as utils_augmentation


from torch.autograd import Variable
from dataloader import MnistBags
from chamferdist import ChamferDistance


In [3]:
import torch

batch_size = 2
num_classes = 11

loss_fn = torch.nn.BCELoss()

outputs_before_sigmoid = torch.randn(batch_size, num_classes)
sigmoid_outputs = torch.sigmoid(outputs_before_sigmoid)
target_classes = torch.randint(0, 2, (batch_size, num_classes), dtype=torch.float)  # randints in [0, 2).

loss = loss_fn(sigmoid_outputs, target_classes)

# alternatively, use BCE with logits, on outputs before sigmoid.
loss_fn_2 = torch.nn.BCEWithLogitsLoss()
loss2 = loss_fn_2(outputs_before_sigmoid, target_classes)
assert loss == loss2

AssertionError: 

In [11]:
loss

tensor(0.7547)

In [2]:
class ColonCancerBagsCross(data_utils.Dataset):
    def __init__(self, path, train_val_idxs=None, test_idxs=None, train=True, shuffle_bag=False, data_augmentation=False, loc_info=False):
        self.path = path
        self.train_val_idxs = train_val_idxs
        self.test_idxs = test_idxs
        self.train = train
        self.shuffle_bag = shuffle_bag
        self.data_augmentation = data_augmentation
        self.location_info = loc_info

        self.data_augmentation_img_transform = transforms.Compose([utils_augmentation.RandomHEStain(),
                                                                   utils_augmentation.HistoNormalize(),
                                                                   utils_augmentation.RandomRotate(),
                                                                   utils_augmentation.RandomVerticalFlip(),
                                                                   transforms.RandomHorizontalFlip(),
                                                                   transforms.ToTensor(),
                                                                   transforms.Normalize((0.5, 0.5, 0.5),
                                                                                        (0.5, 0.5, 0.5))])

        self.normalize_to_tensor_transform = transforms.Compose([
                                                                 utils_augmentation.HistoNormalize(),
                                                                 transforms.ToTensor(),
                                                                 transforms.Normalize((0.5, 0.5, 0.5),
                                                                                      (0.5, 0.5, 0.5))
                                                                    ])

        self.dir_list_train, self.dir_list_test = self.split_dir_list(self.path, self.train_val_idxs, self.test_idxs)
        if self.train:
            self.bag_list_train, self.labels_list_train = self.create_bags(self.dir_list_train)
        else:
            self.bag_list_test, self.labels_list_test = self.create_bags(self.dir_list_test)
            

    @staticmethod
    def split_dir_list(path, train_val_idxs, test_idxs):

        dirs = [x[0] for x in os.walk(path)]
        dirs.pop(0)
        dirs.sort()

        dir_list_train = [dirs[i] for i in train_val_idxs]
        dir_list_test = [dirs[i] for i in test_idxs]
  
        return dir_list_train, dir_list_test

    def create_bags(self, dir_list):
        bag_list = []
        labels_list = []
        for dir in dir_list:
            # Get image name
            img_name = dir.split('\\')[-1]

            # bmp to pillow
            img_dir = dir + '\\' + img_name + '.bmp'
            img = io.imread(img_dir)
            if img.shape[2] == 4:
                img = color.rgba2rgb(img)

            if self.location_info:
                xs = np.arange(0, 500)
                xs = np.asarray([xs for i in range(500)])
                ys = xs.transpose()
                img = np.dstack((img, xs, ys))
            
            cropped_cells = []
            labels = []
            # crop cells
            for label, cell_type in enumerate(['epithelial', 'fibroblast', 'inflammatory', 'others']):
                dir_cell = dir + '/' + img_name + '_' + cell_type + '.mat'
                with open(dir_cell, 'rb') as f:
                    mat_cell = scipy.io.loadmat(f)
                
                for (x,y) in mat_cell['detection']:
                    x = np.round(x)
                    y = np.round(y)

                    if self.data_augmentation:
                        x = x + np.round(np.random.normal(0, 3, 1))
                        y = y + np.round(np.random.normal(0, 3, 1))

                    if x < 13:
                        x_start = 0
                        x_end = 27
                    elif x > 500 - 14:
                        x_start = 500 - 27
                        x_end = 500
                    else:
                        x_start = x - 13
                        x_end = x + 14

                    if y < 13:
                        y_start = 0
                        y_end = 27
                    elif y > 500 - 14:
                        y_start = 500 - 27
                        y_end = 500
                    else:
                        y_start = y - 13
                        y_end = y + 14

                    cropped_cells.append(img[int(y_start):int(y_end), int(x_start):int(x_end)])
                    labels.append(label)

                # generate bag
                bag = cropped_cells

            # store single cell labels
            labels = np.array(labels)

            # shuffle
            if self.shuffle_bag:
                zip_bag_labels = list(zip(bag, labels))
                random.shuffle(zip_bag_labels)
                bag, labels = zip(*zip_bag_labels)

            # append every bag two times if training
            if self.train:
                for _ in [0,1]:
                    bag_list.append(bag)
                    labels_list.append(labels)
            else:
                bag_list.append(bag)
                labels_list.append(labels)

            # bag_list.append(bag)
            # labels_list.append(labels)

        return bag_list, labels_list

    def transform_and_data_augmentation(self, bag):
        if self.data_augmentation:
            img_transform = self.data_augmentation_img_transform
        else:
            img_transform = self.normalize_to_tensor_transform

        bag_tensors = []
        for img in bag:
            if self.location_info:
                bag_tensors.append(torch.cat(
                    (img_transform(img[:, :, :3]), 
                    torch.from_numpy(img[:, :, 3:].astype(float).transpose((2, 0, 1))).float(),
)))
            else:
                bag_tensors.append(img_transform(img))
        
        return torch.stack(bag_tensors)

    def __len__(self):
        if self.train:
            return len(self.labels_list_train)
        else:
            return len(self.labels_list_test)

    def __getitem__(self, index):
        if self.train:
#             print(self.dir_list_train[index])
            bag = self.bag_list_train[index]
            bag_lbls = np.array([1.0 if cat in self.labels_list_train[index] else 0.0 for cat in range(4)])
            label = [bag_lbls, self.labels_list_train[index]]
#             label = [max(self.labels_list_train[index]), self.labels_list_train[index]]
        else:
            bag = self.bag_list_test[index]
            bag_lbls = np.array([1.0 if cat in self.labels_list_test[index] else 0.0 for cat in range(4)])
            label = [bag_lbls, self.labels_list_test[index]]
#             label = [max(self.labels_list_test[index]), self.labels_list_test[index]]

        return self.transform_and_data_augmentation(bag), label


In [3]:
ds = ColonCancerBagsCross(path='C:\\Users\\ivank\\UJ\\Computer Vision\\Final Project\\MIL_wiht_GNN\\python_data\\ColonCancer\\', train_val_idxs=range(100), test_idxs=[], loc_info=False)

In [4]:
print("Image")
print(ds[0][0][0].shape)  # image ds[bag][value | lable][first-last]
print(ds[0][0][1].shape)
print(ds[0][0][19].shape)
print("dddddd")
print(ds[0][1][0], ds[0][1][0].shape)
print(ds[0][1][1], ds[0][1][1].shape)

len(ds)

Image
torch.Size([3, 27, 27])
torch.Size([3, 27, 27])
torch.Size([3, 27, 27])
dddddd
[0. 1. 1. 1.] (4,)
[1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 3 3] (20,)


200

In [5]:
L = []
L.extend(ds[0:5][0])

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [5]:
def load_train_test_val(ds):
    N = len(ds)
    train = []
    test = []
    val = []
    
    step = N * 2 // 3
    for i in range(0, 10):
        train.append((ds[i][0], ds[i][1][0]))

    for i in range(10, 15):
        val.append((ds[i][0], ds[i][1][0]))
    
    for i in range(15, 25):
        test.append((ds[i][0], ds[i][1][0]))
    
    return train, test, val


In [6]:
train_loader, test_loader, val_loader = load_train_test_val(ds)

In [7]:
train_loader[0][1].shape

(4,)

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

scaler = transforms.Scale((224, 224))
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
to_tensor = transforms.ToTensor()

def get_vector(model, layer, image):

    # 2. Create a PyTorch Variable with the transformed image
    t_img = Variable(normalize(scaler(image)).unsqueeze(0))

    # 3. Create a vector of zeros that will hold our feature vector
    #    The 'avgpool' layer has an output size of 512
    my_embedding = torch.zeros(512)
    # 4. Define a function that will copy the output of a layer
    def copy_data(m, i, o):
        my_embedding.copy_(o.data.reshape(o.data.size(1)))
    # 5. Attach that function to our selected layer
    h = layer.register_forward_hook(copy_data)
    # 6. Run the model on our transformed image
    model(t_img)
    # 7. Detach our copy function from the layer
    h.remove()
    # 8. Return the feature vector
    return my_embedding


class GNN(torch.nn.Module):
    def __init__(self, in_channels, out_channels,
                 normalize=False, add_loop=False, lin=True):
        super(GNN, self).__init__()

        self.conv1 = DenseSAGEConv(in_channels, out_channels, normalize)
        self.bn1 = torch.nn.BatchNorm1d(out_channels)
 
        if lin is True:
            self.lin = torch.nn.Linear(out_channels,  out_channels)
        else:
            self.lin = None

    def bn(self, i, x):
        batch_size, num_nodes, num_channels = x.size()

        x = x.view(-1, num_channels)
        x = getattr(self, 'bn{}'.format(i))(x)
        x = x.view(batch_size, num_nodes, num_channels)
        return x

    def forward(self, x, adj):
        # batch_size, num_nodes, in_channels = x.size()

        x = self.bn(1, F.leaky_relu(self.conv1(x, adj), negative_slope=0.01))

        if self.lin is not None:
            x = F.leaky_relu(self.lin(x), negative_slope=0.01)

        return x


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.L = 512
        self.C = 2
        self.CS = 4 # number of classes
        
        self.n = 0.5 # 0 - no-edges; infinity - fully-conected graph
        self.n_step = 0.01 # inrement n if not enoght items
        self.num_adj_parm = 0.1 # this parameter is used to define min graph adjecment len. num_adj_parm * len(bag). 0 - disable
        
        self.feature_extractor_part1 = nn.Sequential(
            nn.Conv2d(3, 20, kernel_size=5),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),
            nn.Conv2d(20, 50, kernel_size=5),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2)
        )

        self.feature_extractor_part2 = nn.Sequential(
            nn.Linear(50 * 4 * 4, self.L),
            nn.ReLU(),
        )

        self.gnn1_pool = GNN(self.L, self.C)
        self.gnn1_embed = GNN(self.L, self.L, lin=False)


        self.gnn3_embed = GNN(self.L, self.L, lin=False)

        input_layers = int(self.L)
        hidden_layers = int(self.L / 2)
        self.lin1 = torch.nn.Linear(input_layers, hidden_layers)
        self.lin2 = torch.nn.Linear(hidden_layers, self.CS)
        
        # Load the pretrained model
        self.feature_model = models.resnet18(pretrained=True)
        # Use the model object to select the desired layer
        self.feature_layer = self.feature_model._modules.get('avgpool')
        # Set model to evaluation mode
        self.feature_model.eval()   

    def forward(self, x):
        x = x.squeeze(0) # [9, 1, 28, 28]

        H = torch.stack([get_vector(self.feature_model, self.feature_layer, img) for img in x]).cuda()

        x, E_idx = self.convert_bag_to_graph_(H, self.n) # nodes [9, 500], E_idx [2, A]
        adj = pyg_ut.to_dense_adj(E_idx.cuda(), max_num_nodes=x.shape[0])

        s = self.gnn1_pool(x, adj)
        x = self.gnn1_embed(x, adj)
        
        x, adj, l1, e1 = dense_diff_pool(x, adj, s)
        
        x = self.gnn3_embed(x, adj)

        x = x.mean(dim=1)
        x = F.relu(self.lin1(x))
        x = self.lin2(x)
        print(F.log_softmax(x, dim=-1).shape, F.log_softmax(x, dim=-1))
        return F.log_softmax(x, dim=-1), l1 , e1 
    
    # GNN methods
    def convert_bag_to_graph_(self, bag, N):
        edge_index = []
        chamferDist = ChamferDistance()
        cos = nn.CosineSimilarity(dim=1, eps=1e-6)
        for cur_i, cur_node in enumerate(bag):
            for alt_i, alt_node in enumerate(bag):
                # print(cos(cur_node.unsqueeze(0), alt_node.unsqueeze(0)))
                # if cur_i != alt_i and self.euclidean_distance_(cur_node, alt_node) < N:
                if cur_i != alt_i and cos(cur_node.unsqueeze(0), alt_node.unsqueeze(0)) > N:
                # if cur_i != alt_i and chamferDist(cur_node.view(1, 1, -1), alt_node.view(1, 1, -1)) < N:
                    edge_index.append(torch.tensor([cur_i, alt_i]).cuda())
                    
        if len(edge_index) < self.num_adj_parm * bag.shape[0]:
            print(f"INFO: get number of adjecment {len(edge_index)}, min len is {self.num_adj_parm * bag.shape[0]}")
            return self.convert_bag_to_graph_(bag, N = (N - self.n_step))
        
        return bag, torch.stack(edge_index).transpose(1, 0)


    def euclidean_distance_(self, X, Y):
        return torch.sqrt(torch.dot(X, X) - 2 * torch.dot(X, Y) + torch.dot(Y, Y))





In [9]:
torch.cuda.is_available()

True

In [10]:
model = Net().to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()

In [12]:


def train(epoch):
    model.train()
    loss_all = 0
    correct = 0
    
    for batch_idx, (data, target) in enumerate(train_loader):
        print(batch_idx, data.shape)
        target = torch.tensor(target)
        if torch.cuda.is_available():
            data, target = data.cuda(), target.cuda()
            
        optimizer.zero_grad()
        output, l, _ = model(data)
        #pred = model(data)[0].max(dim=1)[1]
        
        print(output.squeeze())
        print(target.float())
        loss = criterion(output.squeeze(), target.float()) 
        loss.backward()
        loss_all += target.size(0) * loss.item()
        
        optimizer.step()
        
        #correct += pred.eq(target.view(-1)).sum().item()
    return loss_all / len(train_loader), 0


@torch.no_grad()
def test(loader):
    model.eval()
    correct = 0

    for batch_idx, (data, label) in enumerate(loader):
        data = data.to(device)
        bag_label = label[0]
        if torch.cuda.is_available():
            data, bag_label = data.cuda(), bag_label.cuda()
            
        if bag_label:
            target = torch.tensor([1.], dtype=torch.long).cuda()
        else:
            target = torch.tensor([0.], dtype=torch.long).cuda()
            
        pred = model(data)[0].max(dim=1)[1]
        
        correct += pred.eq(target.view(-1)).sum().item()
    return correct / len(loader.dataset)




best_val_acc = test_acc = 0
for epoch in range(1, 300):
    train_loss, train_acc = train(epoch)
    val_acc = test(val_loader)
    if val_acc > best_val_acc:
        test_acc = test(test_loader)
        best_val_acc = val_acc
    print('Epoch: {:03d}, Train Loss: {:.7f}, Train acc: {:.7f}, '
          'Val Acc: {:.7f}, Test Acc: {:.7f}'.format(epoch, train_loss, train_acc,
                                                     val_acc, test_acc))

0 torch.Size([20, 3, 27, 27])
torch.Size([1, 4]) tensor([[-1.3215, -1.4016, -1.4178, -1.4073]], device='cuda:0',
       grad_fn=<LogSoftmaxBackward>)
tensor([-1.3215, -1.4016, -1.4178, -1.4073], device='cuda:0',
       grad_fn=<SqueezeBackward0>)
tensor([0., 1., 1., 1.], device='cuda:0')


RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling `cublasCreate(handle)`

In [18]:
a = torch.tensor([-1.3215, -1.4016, -1.4178, -1.4073])

RuntimeError: CUDA error: device-side assert triggered