In [14]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import pandas as pd
import cv2
import torch.utils.data as data
from glob import glob
from random import shuffle
from PIL import Image
import random
from tqdm import tqdm
from pred import predprob
from skimage import io 

In [15]:
import sys
paths = sys.path
sys.path.append('/home/yuyue/yuyue/Synchronized-BatchNorm-PyTorch-master')

In [16]:
os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"

In [17]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((512, 512)),
#         RandomCrop((512, 512)),
        transforms.RandomHorizontalFlip(),   # horizontal flip
        transforms.RandomVerticalFlip(),   # vertival flip
        transforms.ColorJitter(0.2,0.2,0.2,0.04),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # mean, std
    ]),
    'val': transforms.Compose([
        transforms.Resize((512, 512)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((512, 512)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])
}

In [18]:
df_train = pd.read_csv('/data/Pathology/SPIE/training_set/breastpathq/datasets/train_labels.csv') 
df_val = pd.read_csv('/data/Pathology/SPIE/training_set/breastpathq/datasets/val_labels.csv') 
df = df_train.append(df_val)

In [19]:
df.head()

Unnamed: 0,slide,rid,y
0,99861,1,0.4
1,99861,2,0.4
2,99861,3,0.15
3,99861,4,0.1
4,99861,5,0.07


In [20]:
def make_dataset(paths, extension, df):
    images = []
    for p in paths:
        if ('.'+extension) in p:
            slide = p.split('/')[-1].split('_')[0]
            slide = int(slide)
            rid = p.split('/')[-1].split('_')[1].split('.')[0]
            rid = int(rid)
            score = df[(df['slide']==slide) & (df['rid']==rid)]['y'].tolist()[0]
           #if score != 0:
            #print(p,float(score))
            images.append([p, float(score)])
    shuffle(images)
    return images

In [21]:
class SPIE_dataset(data.Dataset):
    def __init__(self, dirs, loader, extension, transform=None, train=True):
        self.samples = make_dataset(dirs, extension, df)
        if len(self.samples) == 0:
            raise(RuntimeError("no files in %s" % dirs))
        self.loader = loader
        self.transform = transform
        self.train=train
        
    def __getitem__(self, index):
        path, target = self.samples[index]
        sample = self.loader(path)
        sample = Image.fromarray(sample)
        #target = torch.tensor(target).long()
        if self.transform:
            sample = self.transform(sample)
        if self.train:
            return sample, target
        else:
            return sample, target, path
        #print('target:',target)
    def __len__(self):
        return len(self.samples)

In [22]:
train_samples = glob("/data/Pathology/SPIE/training_set/breastpathq/datasets/train/*.tif")
val_samples = glob("/data/Pathology/SPIE/training_set/breastpathq/datasets/validation/*.tif")
#test_samples = torch.load( '/data/AlgProj/ydx/ydx/zhongshan/datapath/20190410_4_cls/test_444.pth')

In [10]:
train_dataset = SPIE_dataset(train_samples, io.imread, 'tif', transform=data_transforms['train'])
val_dataset = SPIE_dataset(val_samples, io.imread, 'tif', transform=data_transforms['val'])
#test_dataset = Rose_dataset(test_samples, Image.open, 'jpg', transform=data_transforms['test'], train=False)
image_datasets = {'train':train_dataset, 'val':val_dataset}
dataloaders = {"train": torch.utils.data.DataLoader(image_datasets["train"], batch_size=32,
                                             shuffle=True, num_workers=16),
               "val": torch.utils.data.DataLoader(image_datasets["val"], batch_size=8,
                                             shuffle=True, num_workers=4)}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

use_gpu = torch.cuda.is_available()
print(use_gpu)

True


In [11]:
def save_model(model, epoch):
    save_dir = "/data/yuyue/SPIE/model_weight/"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    state_dict = model.module.state_dict()
    pth = os.path.join(save_dir, "densenet169_512_0625_cosine_stage2.pth")
    torch.save(state_dict, pth)

def train_model(model, criterion1,criterion2, optimizer, scheduler, num_epochs=25):
    since = time.time()
    model = DataParallel(model)
   # best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 100.0
    best_epoch = 0
              
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            epoch_labels = []
            epoch_outputs = []
            # Iterate over data.
#             count  = 0
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels = data
                #print('labels.type=',labels.type())
                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()
                
                # forward
                outputs = model(inputs)
                outputs = nn.Softmax(dim=1)(outputs)
                
                # print(outputs.shape)
                outputs_normalize = nn.Softmax(dim=0)(outputs[:,0])
                label_normalize = nn.Softmax(dim=0)(labels)
                log_outputs_normalize = torch.log(outputs_normalize)
                
                
                labels_KL = torch.zeros((len(labels),2))
                labels_KL[:,0] = labels
                labels_KL[:,1] = 1-labels
                
                # _, preds = torch.max(outputs.data, 1) # pred值为output中最大值的位置（0是neg,1是pos）
                labels = labels.float()                

                loss1 = criterion1(outputs, labels_KL.cuda())
                loss2 = criterion2(log_outputs_normalize, label_normalize.float())
                
                loss = loss1+10*loss2
                #print("outputs=",outputs)
                #print("labels=",labels)
                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                    
                for n in range(len(labels)):
                    #print(labels[n].data.cpu().item())
                    pred_value = outputs[n][0].data.cpu().item()
                    epoch_labels.append(labels[n].data.cpu().item())
                    epoch_outputs.append(pred_value)
                #print(epoch_labels)
                # statistics
                running_loss += loss.item()* inputs.size(0)
                #running_corrects += torch.sum(preds == labels.data)
#                 count += 1
#                 if count%100 == 0:
#                     print("batch %d:" % count)
#                     print(phase+"_loss:", loss.data[0])
#                     print(phase+"_acc:", torch.sum(preds == labels.data)/len(labels.data))

            p_k = predprob(np.array(epoch_labels),np.array(epoch_outputs))
            epoch_loss = running_loss / dataset_sizes[phase]
            #epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f}'.format(
                phase, epoch_loss))
            print('p_k = ',p_k)
#             if phase == "train":
#                 save_model(model, epoch)

#             # deep copy the model
#             if phase == 'val' and epoch_acc > best_acc:
#                 best_acc = epoch_acc
#                 best_model_wts = copy.deepcopy(model.state_dict())

            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_epoch = epoch
                save_model(model, epoch)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    #print('Best val Acc: {:4f}'.format(best_acc))
    print('Best val epoch: {:4f}'.format(best_epoch))

    # load best model weights
#     model.load_state_dict(best_model_wts)
    return model

In [12]:
from torch.nn import DataParallel
from densenet import densenet169
model_ft = densenet169(pretrained=True)
num_ftrs = model_ft.classifier.in_features
model_ft.classifier = nn.Linear(num_ftrs, 2)
#model_ft.add_module("sigmoid", module=nn.Sigmoid())
from sync_batchnorm import convert_model
model_ft = convert_model(model_ft)

# load pretrained model
model_ft.load_state_dict(torch.load("/data/yuyue/SPIE/model_weight/densenet169_512_0625_cosine.pth"))
# print("pretrained model loaded")

if use_gpu:
    model_ft = model_ft.cuda()

criterion1 = nn.MSELoss()
criterion2 = nn.KLDivLoss()
# Observe that all parameters are being optimized
# do not forget to change learning rate
# optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9,weight_decay=1e-3)
optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-5,weight_decay=1e-4)
# Decay LR by a factor of 0.1 every 5 epochs
exp_lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer_ft, T_max=10, eta_min=0, last_epoch=-1)
# exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.5)

  nn.init.kaiming_normal(m.weight.data)


In [13]:
model_ft = train_model(model_ft, criterion1,criterion2, optimizer_ft, exp_lr_scheduler,
                       num_epochs=30)

Epoch 0/29
----------




train Loss: 0.0061
p_k =  0.9555336185406734
val Loss: 0.0289
p_k =  0.8972234520270698
Epoch 1/29
----------
train Loss: 0.0061
p_k =  0.9583960852233417
val Loss: 0.0439
p_k =  0.8701536904686611
Epoch 2/29
----------
train Loss: 0.0052
p_k =  0.9620731498781829
val Loss: 0.0373
p_k =  0.8786288027322751
Epoch 3/29
----------
train Loss: 0.0054
p_k =  0.9606673114423365
val Loss: 0.0260
p_k =  0.9019669850104357
Epoch 4/29
----------
train Loss: 0.0052
p_k =  0.9610589224559207
val Loss: 0.0332
p_k =  0.890203023211688
Epoch 5/29
----------
train Loss: 0.0050
p_k =  0.9622919970945876
val Loss: 0.0368
p_k =  0.8836885712478655
Epoch 6/29
----------
train Loss: 0.0050
p_k =  0.9633827033827975
val Loss: 0.0359
p_k =  0.8867244323572196
Epoch 7/29
----------
train Loss: 0.0051
p_k =  0.9635999818019518
val Loss: 0.0362
p_k =  0.883372335715641
Epoch 8/29
----------
train Loss: 0.0053
p_k =  0.963020703417311
val Loss: 0.0378
p_k =  0.8797672506482828
Epoch 9/29
----------
train Loss: 0

In [14]:
def infer(model, use_gpu=True):
    model.train(False)
    running_labels = []
    running_outputs = []
    running_paths = []
    for data in dataloaders['val']:
        inputs, labels = data
        if use_gpu:
            inputs = Variable(inputs.cuda())
            labels = Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)
        #print("labels=",labels)
        
        outputs = model(inputs)
        outputs = nn.Sigmoid()(outputs)
        #print("outputs=",outputs)
#         probs, preds = torch.max(outputs.data, 1)
        outputs = list(outputs.cpu().data.numpy())
        labels = list(labels.cpu().data.numpy())
        #paths = list(paths)
        running_labels += labels
        running_outputs += outputs
        running_paths += paths
        
    return np.array(running_outputs), np.array(running_labels)

In [15]:
from densenet import densenet169
model_infer = densenet169(pretrained=False)
num_ftrs = model_infer.classifier.in_features
model_infer.classifier = nn.Linear(num_ftrs, 2)
#model_infer.add_module("sigmoid", module=nn.Sigmoid())
from sync_batchnorm import convert_model
model_infer = convert_model(model_infer)
model_infer.load_state_dict(torch.load('/data/yuyue/SPIE/model_weight/densenet169_512_0625_cosine.pth'))
print("model loaded")
model = model_infer.cuda()
model_infer.eval()
use_gpu = torch.cuda.is_available()
if use_gpu:
    model_infer = model_infer.cuda()

model loaded


In [16]:
outputs, labels = infer(model_infer)

In [17]:
outputs

array([[0.08737666, 0.9717301 ],
       [0.5018973 , 0.5456253 ],
       [0.46613047, 0.57655185],
       [0.43398836, 0.5995543 ],
       [0.26693392, 0.7436324 ],
       [0.59045875, 0.4616059 ],
       [0.39582422, 0.6377699 ],
       [0.58409834, 0.46619937],
       [0.5177472 , 0.5235158 ],
       [0.4744006 , 0.57134223],
       [0.4526722 , 0.5737431 ],
       [0.29722965, 0.7211711 ],
       [0.2832683 , 0.75427204],
       [0.20744741, 0.81327325],
       [0.31640187, 0.7140181 ],
       [0.45058283, 0.5920787 ],
       [0.49940228, 0.54769415],
       [0.6863106 , 0.37225145],
       [0.46591762, 0.57458323],
       [0.7555738 , 0.29080614],
       [0.8396333 , 0.2084296 ],
       [0.47425613, 0.5688702 ],
       [0.5310754 , 0.52051204],
       [0.20148595, 0.80494434],
       [0.46299163, 0.57899636],
       [0.29405844, 0.74812305],
       [0.65638644, 0.39570048],
       [0.43461722, 0.63963205],
       [0.5782485 , 0.46150598],
       [0.59195423, 0.4629021 ],
       [0.

In [18]:
labels

array([0.  , 0.4 , 0.5 , 0.35, 0.02, 0.7 , 0.35, 0.6 , 0.55, 0.5 , 0.5 ,
       0.05, 0.  , 0.  , 0.2 , 0.2 , 0.7 , 0.8 , 0.15, 0.8 , 0.95, 0.4 ,
       0.7 , 0.1 , 0.4 , 0.2 , 0.8 , 0.45, 0.7 , 0.5 , 0.5 , 0.  , 0.05,
       0.4 , 0.5 , 0.1 , 0.5 , 0.4 , 0.  , 0.9 , 0.9 , 0.2 , 0.05, 0.55,
       0.6 , 0.02, 0.8 , 0.  , 0.2 , 0.9 , 0.7 , 0.2 , 0.  , 0.7 , 0.  ,
       0.  , 0.2 , 0.65, 0.8 , 0.15, 0.1 , 0.  , 0.  , 0.  , 0.03, 0.  ,
       0.9 , 0.7 , 0.5 , 0.85, 0.6 , 0.35, 0.2 , 0.65, 0.6 , 0.03, 0.8 ,
       0.6 , 0.05, 0.  , 0.6 , 0.15, 0.5 , 0.05, 0.9 , 0.15, 0.95, 0.  ,
       0.15, 0.9 , 0.4 , 0.2 , 0.03, 0.15, 0.1 , 0.  , 0.8 , 0.25, 0.02,
       0.  , 0.5 , 0.5 , 0.5 , 0.4 , 0.15, 0.  , 0.15, 0.25, 0.15, 0.25,
       0.6 , 0.  , 0.1 , 0.  , 0.95, 0.2 , 0.  , 0.65, 0.7 , 0.4 , 0.03,
       0.4 , 0.1 , 0.  , 0.3 , 0.05, 0.3 , 0.2 , 0.15, 0.35, 0.7 , 1.  ,
       0.  , 0.5 , 0.5 , 0.07, 0.55, 0.2 , 0.5 , 0.4 , 0.  , 0.5 , 0.02,
       0.  , 0.5 , 0.  , 0.65, 0.1 , 0.2 , 0.7 , 0.

In [19]:
from pred import predprob

In [20]:
predprob(labels,outputs[:,0])

0.9193599392827778