In [1]:
#Training 
#Custom Batch Generator:
import pandas as pd
import numpy as np 
from tqdm import tqdm
import torchvision
from glob import glob
import os
import matplotlib.pyplot as plt
from torchvision import transforms
from torchvision import models
import torch
from torch.autograd import Variable
import torch.nn as nn
from torch.optim import lr_scheduler
from torch import optim
from torchvision.utils import make_grid
import time
from torch.utils.data import Dataset
from PIL import Image

import copy
%matplotlib inline

In [2]:
df = pd.read_csv("/home/single1/BACKUP/SamHUyen/mammo/huyen/csv_singleview.csv")
#path of image
df["path"] = "/home/single1/BACKUP/SamHUyen/multi_view_mammo_classification/crop-images/crop_images/" + df["image_id"] + ".png"

In [3]:
#CROP IMAGE
#Data processing
df = df.drop([2438, 18074,5642,7422,9305,9310,9824,10483,11724,13368])

In [4]:
#Dataloader
class MultiLabelMammo(Dataset):
    
    def __init__(self, dataframe, transform = None):
        
        self.dataframe = dataframe
        self.imagespath = dataframe.path.values
        self.transform = transform
        self.labels = dataframe.multilabel.values.tolist()
        
        
    def __len__(self):
        return len(self.dataframe)
    
    
    def __getitem__(self, index):
        
        image = Image.open(self.imagespath[index]).convert('RGB')
        label = self.labels[index]
        label = label.strip('][').split(', ')
        label = np.array(label).astype('float')
        sample = {'image': image, 'label': label}
        if self.transform:
            image = self.transform(sample['image'])
            sample = {'image': image, 'label': label}
        
        return sample

In [5]:
#transform images size from (3518, 2800, 3) to (1759,1400,3)
tfms = transforms.Compose([transforms.Resize((512, 512)),
                           transforms.ToTensor()])

In [6]:
#fold: ['train', 'valid', 'holdout']
train_dl = MultiLabelMammo(df[df["fold"]=="train"], transform = tfms) 
val_dl = MultiLabelMammo(df[df["fold"]=="valid"], transform = tfms)
# holdout_dl = MultiLabelMammo(df[df["fold"]=="valid"], transform = tfms)

In [7]:
train_dataloader = torch.utils.data.DataLoader(train_dl, shuffle = True, batch_size = 4, num_workers = 3)
val_dataloader = torch.utils.data.DataLoader(val_dl, shuffle = True, batch_size = 1, num_workers = 3)

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() 
                                  else "cpu")

In [9]:
res_mod = models.resnet34(pretrained=True)

num_ftrs = res_mod.fc.in_features
res_mod.fc = nn.Linear(num_ftrs, 10)


In [10]:
for name, child in res_mod.named_children():
    print(name)

conv1
bn1
relu
maxpool
layer1
layer2
layer3
layer4
avgpool
fc


In [11]:
res_mod = res_mod.to(device)
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(res_mod.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 10 epochs
# exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1)

In [62]:
def train_model(model, criterion, optimizer, num_epochs=10):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                # scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            current_loss = 0.0
            current_corrects = 0

            # Here's where the training happens
            print('Iterating through data...')


            for i, data in enumerate(tqdm(globals()[f'{phase}_dataloader'])):
                
                inputs = data['image'].to(device)
                labels = data['label'].to(device)
                labels = labels.long()
                print("\n labels ", labels)
                #labels = torch.max(labels, 1)[1]

                # We need to zero the gradients, don't forget it
                optimizer.zero_grad()

                # Time to carry out the forward training poss
                # We only need to log the loss stats if we are in training phase
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    print("outputs: ", outputs)
                    #_, preds = torch.max(outputs, 1)
                    preds = convert(outputs)
                    loss = criterion(preds, labels) #outputs

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # We want variables to hold the loss statistics
                current_loss += loss.item() * inputs.size(0)
                current_corrects += torch.sum(preds == labels.data)
                print("\n preds ", preds)
                print("\n true values ", labels.data)
                print(" current_corrects ", current_corrects)

            epoch_loss = current_loss / (13488 if phase == 'train' else 2888)
            epoch_acc = current_corrects.double() / (13488 if phase == 'train' else 2888)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # Make a copy of the model if the accuracy on the validation set has improved
            if phase == 'val':
                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                    torch.save(best_model_wts, '/home/single1/BACKUP/SamHUyen/mammo/sam/singleview_sam/model/BEST_redmodTest.pt')            
            
                torch.save(model.state_dict(), '/home/single1/BACKUP/SamHUyen/mammo/sam/singleview_sam/model/redmod_test.pt')

        print()

    time_since = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_since // 60, time_since % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # Now we'll load in the best model weights and return it
    # model.load_state_dict(best_model_wts)
    

In [13]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
train_model(res_mod, criterion, optimizer_ft, num_epochs=20)

  0%|          | 0/3370 [00:00<?, ?it/s]Epoch 0/19
----------
Iterating through data...
100%|██████████| 3370/3370 [02:45<00:00, 20.40it/s]
  0%|          | 0/2888 [00:00<?, ?it/s]train Loss: 0.8830 Acc: 0.7055
Iterating through data...
100%|██████████| 2888/2888 [00:29<00:00, 98.42it/s]
  0%|          | 0/3370 [00:00<?, ?it/s]val Loss: 0.8392 Acc: 0.7497

Epoch 1/19
----------
Iterating through data...
100%|██████████| 3370/3370 [02:45<00:00, 20.35it/s]
  0%|          | 0/2888 [00:00<?, ?it/s]train Loss: 0.7874 Acc: 0.7522
Iterating through data...
100%|██████████| 2888/2888 [00:28<00:00, 101.36it/s]
val Loss: 0.7849 Acc: 0.7628
  0%|          | 0/3370 [00:00<?, ?it/s]
Epoch 2/19
----------
Iterating through data...
100%|██████████| 3370/3370 [02:46<00:00, 20.26it/s]
  0%|          | 0/2888 [00:00<?, ?it/s]train Loss: 0.7540 Acc: 0.7591
Iterating through data...
100%|██████████| 2888/2888 [00:28<00:00, 100.03it/s]
val Loss: 0.7394 Acc: 0.7666
  0%|          | 0/3370 [00:00<?, ?it/s]
E

In [64]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
train_model(res_mod, criterion, optimizer_ft, num_epochs=20)

RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 10.75 GiB total capacity; 1.17 GiB already allocated; 31.50 MiB free; 1.19 GiB reserved in total by PyTorch)

In [20]:
 third_tensor = torch.cat((torch.tensor([0,1,0]), torch.tensor([1,1,0])), 0)
 third_tensor

tensor([0, 1, 0, 1, 1, 0])

In [26]:
labels = torch.tensor([[0, 0, 1, 0, 0, 0, 0, 0, 1, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 1, 0, 0, 0, 0, 0, 1, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 1]], device='cuda:0')

In [27]:
labels

tensor([[0, 0, 1, 0, 0, 0, 0, 0, 1, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 1, 0, 0, 0, 0, 0, 1, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 1]], device='cuda:0')

In [42]:
outputs=  torch.tensor([[ 3.3211, -5.3717, 12.0751,  4.2464,  5.3956, -0.3999, -4.8750, -5.1574,
         -5.2155, -5.0423],
        [ 2.3364, 13.5524,  6.9783,  2.3466,  0.7610, -4.3412, -5.7815, -5.6116,
         -5.8110, -5.7357],
        [ 2.4455, 12.6521,  6.3002,  2.1030,  0.9366, -3.8087, -5.5127, -5.3128,
         -5.4292, -5.4021],
        [ 2.6582, 10.8746,  4.4139,  4.2448,  0.6733, -3.1573, -5.2087, -5.1427,
         -5.2411, -5.2520]], device='cuda:0')

In [51]:
outputs[:,6:]


tensor([[-4.8750, -5.1574, -5.2155, -5.0423],
        [-5.7815, -5.6116, -5.8110, -5.7357],
        [-5.5127, -5.3128, -5.4292, -5.4021],
        [-5.2087, -5.1427, -5.2411, -5.2520]], device='cuda:0')

In [59]:
def convert(outputs):
    birads = torch.max(outputs[:,:6],1)[1] #get maximmum indices of birad
    density = torch.max(outputs[:,6:],1)[1]
    result = torch.zeros((4,10))
    for ind in range(0,4):
        result[ind,birads[ind]] = 1
        result[ind,6 + density[ind]] = 1
    return result
#print(birads)
#print(density)
#print(result)

In [65]:
len(outputs)

4