In [1]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import pandas as pd
import cv2
import torch.utils.data as data
from glob import glob
from random import shuffle
from PIL import Image
import random
from tqdm import tqdm
from pred import predprob
from skimage import io 

In [2]:
import sys
paths = sys.path
sys.path.append('/home/yuyue/yuyue/Synchronized-BatchNorm-PyTorch-master')

In [3]:
os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"

In [4]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((512, 512)),
#         RandomCrop((512, 512)),
        transforms.RandomHorizontalFlip(),   # horizontal flip
        transforms.RandomVerticalFlip(),   # vertival flip
        transforms.ColorJitter(0.2,0.2,0.2,0.04),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # mean, std
    ]),
    'val': transforms.Compose([
        transforms.Resize((512, 512)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((512, 512)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])
}

In [5]:
df_train = pd.read_csv('/data/Pathology/SPIE/training_set/breastpathq/datasets/train_labels.csv') 
df_val = pd.read_csv('/data/Pathology/SPIE/training_set/breastpathq/datasets/val_labels.csv') 
df = df_train.append(df_val)

In [6]:
def make_dataset(paths, extension, df):
    images = []
    for p in paths:
        if ('.'+extension) in p:
            slide = p.split('/')[-1].split('_')[0]
            slide = int(slide)
            rid = p.split('/')[-1].split('_')[1].split('.')[0]
            rid = int(rid)
            score = df[(df['slide']==slide) & (df['rid']==rid)]['y'].tolist()[0]
           #if score != 0:
            #print(p,float(score))
            images.append([p, float(score)])
    shuffle(images)
    return images

In [7]:
class SPIE_dataset(data.Dataset):
    def __init__(self, dirs, loader, extension, transform=None, train=True):
        self.samples = make_dataset(dirs, extension, df)
        if len(self.samples) == 0:
            raise(RuntimeError("no files in %s" % dirs))
        self.loader = loader
        self.transform = transform
        self.train=train
        
    def __getitem__(self, index):
        path, target = self.samples[index]
        sample = self.loader(path)
        sample = Image.fromarray(sample)
        #target = torch.tensor(target).long()
        if self.transform:
            sample = self.transform(sample)
        if self.train:
            return sample, target
        else:
            return sample, target, path
        #print('target:',target)
    def __len__(self):
        return len(self.samples)

In [8]:
train_samples = glob("/data/Pathology/SPIE/training_set/breastpathq/datasets/train/*.tif")
val_samples = glob("/data/Pathology/SPIE/training_set/breastpathq/datasets/validation/*.tif")
#test_samples = torch.load( '/data/AlgProj/ydx/ydx/zhongshan/datapath/20190410_4_cls/test_444.pth')

In [9]:
train_dataset = SPIE_dataset(train_samples, io.imread, 'tif', transform=data_transforms['train'])
val_dataset = SPIE_dataset(val_samples, io.imread, 'tif', transform=data_transforms['val'])
#test_dataset = Rose_dataset(test_samples, Image.open, 'jpg', transform=data_transforms['test'], train=False)
image_datasets = {'train':train_dataset, 'val':val_dataset}
dataloaders = {"train": torch.utils.data.DataLoader(image_datasets["train"], batch_size=16,
                                             shuffle=True, num_workers=16),
               "val": torch.utils.data.DataLoader(image_datasets["val"], batch_size=4,
                                             shuffle=True, num_workers=4)}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

use_gpu = torch.cuda.is_available()
print(use_gpu)

True


In [27]:
def infer(model_2classes,model_regression, use_gpu=True):
    model_regression.train(False)
    model_2classes.train(False)
    running_labels = []
    running_outputs = []
    running_paths = []
    for data in dataloaders['val']:
        inputs, labels = data
        if use_gpu:
            inputs = Variable(inputs.cuda())
            labels = Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)
        #print("labels=",labels)
        
        outputs_2classes = model_2classes(inputs)
        _, preds = torch.max(outputs_2classes.data, 1)
        #print(preds)
        for i in range(len(preds)):
            if preds[i] == 1:
                outputs = model_regression(inputs[i].unsqueeze(0))
                outputs = nn.Sigmoid()(outputs[:,0])
                outputs = list(outputs.cpu().data.numpy())
                #print(outputs.shape)
            elif preds[i] == 0:
                outputs = torch.zeros((1))
                #print(outputs)
                outputs = Variable(outputs.cuda())
                outputs = list(outputs.cpu().data.numpy())
            running_outputs += outputs
        #print("outputs=",outputs)
#         probs, preds = torch.max(outputs.data, 1)
        #outputs = list(outputs.cpu().data.numpy())
        labels = list(labels.cpu().data.numpy())
        #paths = list(paths)
        running_labels += labels
        #running_outputs += outputs
        running_paths += paths
        
    return np.array(running_outputs), np.array(running_labels)

In [30]:
from densenet import densenet169
model_regression = densenet169(pretrained=False)
num_ftrs = model_regression.classifier.in_features
model_regression.classifier = nn.Linear(num_ftrs, 1)
#model_infer.add_module("sigmoid", module=nn.Sigmoid())
from sync_batchnorm import convert_model
model_regression = convert_model(model_regression)
model_regression.load_state_dict(torch.load('/data/yuyue/SPIE/model_weight/densenet169_512_0626_sigmoid_1.pth'))
print("model loaded")
#model = model_regression.cuda()
model_regression.eval()
use_gpu = torch.cuda.is_available()
if use_gpu:
    model_regression = model_regression.cuda()

model loaded


In [19]:
from densenet import densenet169
model_2classes = densenet169(pretrained=False)
num_ftrs = model_2classes.classifier.in_features
model_2classes.classifier = nn.Linear(num_ftrs, 2)
#model_infer.add_module("sigmoid", module=nn.Sigmoid())
from sync_batchnorm import convert_model
model_2classes = convert_model(model_2classes)
model_2classes.load_state_dict(torch.load('/data/yuyue/SPIE/model_weight/2-classes_python2.pth'))
print("model loaded")
#model = model_2classes.cuda()
model_2classes.eval()
use_gpu = torch.cuda.is_available()
if use_gpu:
    model_2classes = model_2classes.cuda()

model loaded


In [31]:
outputs, labels = infer(model_2classes,model_regression)

In [32]:
from pred import predprob
predprob(labels,outputs)

0.9229650243501359