In [5]:
import glob,pylab,pandas as pd
import pydicom,numpy as np
import random
import json
import time
import copy
import torchvision
import sys
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt
from matplotlib import patches,patheffects

from sklearn.model_selection import train_test_split
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import Dataset,DataLoader
from torch.optim import lr_scheduler
from pathlib import Path

PATH = Path("/data/krf/dataset")

class CDataset(Dataset):
    def __init__(self,ds,img_dir,class_df = None,transform=None,ext=None):
        self.ds = ds
        self.img_dir = img_dir
        self.class_df = class_df
        self.ext = ext or '.dcm'
        self.transform = transforms.Compose(transform) if transform else None
    
    def __len__(self):
        return len(self.ds)
    
    def read_dicom_image(self,loc):
        img_arr = pydicom.read_file(loc.as_posix()).pixel_array
        img_arr = img_arr/img_arr.max()
        img_arr = (255*img_arr).clip(0,255).astype(np.uint8)
        img_arr = Image.fromarray(img_arr).convert('RGB')
        return img_arr
    
    def __getitem__(self,i):
        img = self.read_dicom_image(self.ds[i])
        if self.transform:
            img = self.transform(img)
        patientId = self.ds[i].name.split('.')[0]
        #kls = self.class_df[self.class_df['patientId'] == patientId]
        return img,patientId#,kls.iloc[0].Target
    

#img_dir = PATH/'stage_1_train_images'
#sample = random.sample(list(img_dir.iterdir()),100)
#sample = list(img_dir.iterdir())

# train,test = train_test_split(sample)

# transform = [transforms.Resize(224),transforms.RandomHorizontalFlip(),transforms.ToTensor()]
# train_ds = CDataset(train,img_dir,transform=transform)
# test_ds = CDataset(test,img_dir,transform=transform)


# batch_size=128
# sz=224
# train_dl = DataLoader(train_ds,batch_size = batch_size)
# test_dl = DataLoader(test_ds,batch_size=batch_size)


def show_img(im,figsize=None,ax=None):
    if not ax:
        fig,ax = plt.subplots(figsize = figsize)
    ax.imshow(im)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    return ax

def draw_outline(o, lw):
    o.set_path_effects([patheffects.Stroke(
        linewidth=lw,foreground='black'),patheffects.Normal()])
    
def draw_rect(ax,b):
    patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:],fill=False,edgecolor='white',lw=2))
    draw_outline(patch,4)

def draw_text(ax,xy,txt,sz=14):
    text = ax.text(*xy,txt,verticalalignment='top',color='white',fontsize=sz,weight='bold')
    draw_outline(text,1)
    
    
#image,klass = next(iter(train_dl))
#fig,axes = plt.subplots(1,4,figsize=(12,2))
#for i,ax in enumerate(axes.flat):
#    image,klass
#    ima=image[i][0]
#    b = klass[i]
#    ax = show_img(ima,ax=ax)
#    draw_text(ax,(0,0),b)
    
#plt.tight_layout()

use_gpu = torch.cuda.is_available()
print("Use gpu = {}".format(use_gpu))

#dataloaders = {'train':train_dl,'val':test_dl}

device = torch.cuda.set_device(0)


model_ft = torchvision.models.resnet18(pretrained=False)#True)
#修改分类数
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs,2)

criterion = nn.CrossEntropyLoss()
model_ft = model_ft.cuda()

optimizer_ft = optim.Adam(model_ft.parameters())

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft,step_size=7,gamma=0.1)
#since = time.time()
#print("Start training")
##model_ft = train_model(model_ft,criterion,optimizer_ft,exp_lr_scheduler,num_epochs=20)
# Save the model checkpoint
#torch.save(model_ft.state_dict(), 'resnet.ckpt')



Use gpu = True


In [6]:
model_ft.load_state_dict(torch.load('resnet.ckpt'))

In [4]:
img_dir = PATH/'stage_1_test_images'
#img_dir = PATH/'stage_1_test_images'
#sample = random.sample(list(img_dir.iterdir()),100)
sample = list(img_dir.iterdir())

transform = [transforms.Resize(224),transforms.RandomHorizontalFlip(),transforms.ToTensor()]
test_ds = CDataset(sample,img_dir,transform=transform)

batch_size=1
sz=224
test_dl = DataLoader(test_ds,batch_size=batch_size)


In [5]:
count = 0
num = 1000
model_ft.eval()
labelFile = 'stage_1_test_labels.csv'
result = []
from tqdm import tqdm
with open(labelFile, 'w') as file:
    file.write("patientId,Target\n")
    
    for data in tqdm(test_dl):
        inputs,patientId = data
        #model_ft.train(False)
        if use_gpu:
            inputs = Variable(inputs.cuda(),requires_grad=True)
        else:
            inputs = Variable(inputs)

        # zero the parameter gradients
        #optimizer.zero_grad()
        #print("inputs:",inputs)
        #print("patientId:",patientId)
        #forward
        outputs = model_ft(inputs)
        #print("outputs",outputs)
        _,preds = torch.max(outputs.data,1)
        if preds == 0:
            #print(0)
            result.append(0)
            file.write(patientId[0]+",0\n")
        else:
            #print(1)
            result.append(1)
            file.write(patientId[0]+",1\n")
        count+=1
        if count>=num:
            break
print(sum(result))

100%|█████████▉| 999/1000 [02:17<00:00,  7.47it/s]

152


In [8]:
##TEST ON TRAINING SET
train_bb_df = pd.read_csv(PATH/'stage_1_train_labels.csv')
#print(train_bb_df.head())

train_bb_df['duplicate'] = train_bb_df.duplicated(['patientId'],keep=False)
#print(train_bb_df[train_bb_df['duplicate']].head())

detailed_df = pd.read_csv(PATH/'stage_1_detailed_class_info.csv')
# merge two df
class_df = train_bb_df.merge(detailed_df,on="patientId")

csv_df = class_df.filter(['patientId','Target'],)
csv_df = csv_df.set_index('patientId',)
#class_df.head(10)
#print(csv_df.head(10))
class TDataset(Dataset):
    def __init__(self,ds,img_dir,class_df,transform=None,ext=None):
        self.ds = ds
        self.img_dir = img_dir
        self.class_df = class_df
        self.ext = ext or '.dcm'
        self.transform = transforms.Compose(transform) if transform else None
    
    def __len__(self):
        return len(self.ds)
    
    def read_dicom_image(self,loc):
        img_arr = pydicom.read_file(loc.as_posix()).pixel_array
        img_arr = img_arr/img_arr.max()
        img_arr = (255*img_arr).clip(0,255).astype(np.uint8)
        img_arr = Image.fromarray(img_arr).convert('RGB')
        return img_arr
    
    def __getitem__(self,i):
        img = self.read_dicom_image(self.ds[i])
        if self.transform:
            img = self.transform(img)
        patientId = self.ds[i].name.split('.')[0]
        kls = self.class_df[self.class_df['patientId'] == patientId]
        return img,kls.iloc[0].Target

img_dir = PATH/'stage_1_train_images'
sample = random.sample(list(img_dir.iterdir()),1000)
#sample = list(img_dir.iterdir())

transform = [transforms.Resize(224),transforms.RandomHorizontalFlip(),transforms.ToTensor()]
train_ds = TDataset(sample,img_dir,class_df,transform=transform)

batch_size=100
sz=224
train_dl = DataLoader(train_ds,batch_size = batch_size)


count = 0
num = 100
model_ft.eval()
#labelFile = 'stage_1_train_labels_pred.csv'
result = []
from tqdm import tqdm
running_corrects = 0
for data in tqdm(train_dl):
    inputs,labels = data
    #print(data)
    #print(labels)
    #model_ft.train(False)
    if use_gpu:
        inputs = Variable(inputs.cuda(),requires_grad=True)
        labels = Variable(labels.cuda())
    else:
        inputs,labels = Variable(inputs),Variable(labels)
        
        #print("inputs:",inputs)
        #print("patientId:",patientId)
        #forward
    outputs = model_ft(inputs)
    print("outputs",outputs)
    _,preds = torch.max(outputs.data,1)
    running_corrects += torch.sum(preds == labels.data)
    
print(running_corrects)
print(sum(result))

 10%|█         | 1/10 [00:11<01:39, 11.03s/it]

outputs tensor([[ 1.5902, -2.1026],
        [ 3.0603, -4.1227],
        [ 1.9850, -2.4862],
        [ 2.1885, -2.9284],
        [ 0.2405, -0.3085],
        [ 1.8111, -2.2775],
        [ 2.8367, -3.8959],
        [ 1.2188, -1.2609],
        [-0.2324,  0.1261],
        [ 0.0185, -0.0909],
        [ 1.2830, -1.3361],
        [ 0.6991, -0.6764],
        [-0.8352,  0.6323],
        [ 0.7706, -0.7487],
        [ 0.2257, -0.3461],
        [ 1.6826, -2.1241],
        [-0.5665,  0.3471],
        [ 1.3765, -1.6565],
        [ 1.1685, -1.3228],
        [ 0.6623, -0.6704],
        [ 1.1871, -1.3583],
        [-0.1378,  0.0364],
        [-0.4197,  0.2057],
        [ 2.1919, -2.8517],
        [ 1.0863, -1.2385],
        [ 1.8673, -2.3233],
        [ 2.5181, -3.4067],
        [ 2.2764, -2.9767],
        [ 0.4023, -0.4679],
        [-0.3053,  0.1954],
        [ 1.0553, -1.2657],
        [ 2.5900, -3.4873],
        [-1.3277,  0.9893],
        [ 0.8195, -0.8008],
        [ 0.3786, -0.3581],
        [ 2.

 20%|██        | 2/10 [00:22<01:28, 11.05s/it]

outputs tensor([[ 1.2461, -1.6586],
        [ 1.8407, -2.3726],
        [ 2.1564, -2.8685],
        [ 2.2942, -3.0692],
        [ 0.4433, -0.3971],
        [ 0.6906, -0.7788],
        [ 0.4847, -0.4590],
        [ 2.7300, -3.6977],
        [ 1.1309, -1.2785],
        [ 1.4199, -1.6733],
        [ 2.7392, -3.7494],
        [ 0.7432, -0.7704],
        [ 1.8000, -2.2959],
        [ 0.6626, -0.7220],
        [ 1.1267, -1.0788],
        [ 2.4974, -3.2861],
        [ 0.6489, -0.5462],
        [ 3.0559, -4.1068],
        [ 2.4857, -3.3277],
        [ 0.4363, -0.4195],
        [ 1.1225, -1.2875],
        [-0.0638, -0.0541],
        [ 2.4853, -3.2911],
        [-0.4710,  0.2978],
        [-0.4463,  0.2800],
        [ 1.3242, -1.5505],
        [ 1.3158, -1.6261],
        [ 1.0739, -1.1457],
        [ 0.3389, -0.4151],
        [ 2.7528, -3.7109],
        [-0.1324,  0.0346],
        [ 2.0380, -2.6210],
        [ 0.5750, -0.5291],
        [ 2.2826, -2.9700],
        [-1.0582,  0.7712],
        [ 0.

 30%|███       | 3/10 [00:31<01:14, 10.62s/it]

outputs tensor([[ 2.2641, -2.9648],
        [ 0.3225, -0.2882],
        [-0.2564,  0.1499],
        [ 0.8031, -0.7791],
        [ 3.0736, -4.1507],
        [ 2.4077, -3.1306],
        [ 0.4782, -0.4569],
        [-0.3371,  0.2179],
        [ 0.4491, -0.5023],
        [ 0.6314, -0.6031],
        [ 0.0579, -0.1436],
        [ 2.0941, -2.5582],
        [ 0.1662, -0.1976],
        [ 2.5685, -3.4927],
        [ 2.1848, -2.8310],
        [ 1.2910, -1.4311],
        [ 1.5855, -1.8763],
        [-0.7748,  0.5455],
        [ 1.8347, -2.2619],
        [ 0.2266, -0.2821],
        [-0.8100,  0.5100],
        [ 2.0417, -2.7166],
        [ 1.9995, -2.6813],
        [ 1.8904, -2.3447],
        [ 0.4850, -0.4961],
        [ 2.3961, -3.1894],
        [ 2.0137, -2.4980],
        [-0.1263, -0.0277],
        [ 2.5696, -3.5293],
        [ 2.5624, -3.3888],
        [ 0.1906, -0.2714],
        [ 0.2195, -0.2257],
        [ 1.2517, -1.4350],
        [ 1.9733, -2.6142],
        [ 0.1009, -0.1606],
        [ 1.

 40%|████      | 4/10 [00:41<01:02, 10.33s/it]

outputs tensor([[ 0.6950, -0.7368],
        [-0.1979, -0.0099],
        [-0.5011,  0.3427],
        [ 1.2138, -1.2812],
        [ 2.2319, -2.9284],
        [ 0.3819, -0.4024],
        [ 1.5203, -1.8960],
        [ 0.8195, -0.8077],
        [-0.4784,  0.3078],
        [ 3.2152, -4.4585],
        [ 3.0252, -4.0400],
        [ 1.4589, -1.7299],
        [-0.4371,  0.2572],
        [-0.5457,  0.3347],
        [ 2.2999, -3.0453],
        [ 0.8890, -0.9257],
        [ 3.2563, -4.4355],
        [ 1.5470, -1.9402],
        [ 0.7710, -0.8087],
        [-0.4499,  0.2852],
        [-0.0898, -0.0240],
        [ 0.6763, -0.6665],
        [ 0.8607, -0.8624],
        [ 0.8096, -0.9122],
        [-0.2170,  0.0889],
        [ 2.5334, -3.4679],
        [-0.2550,  0.0488],
        [ 3.1941, -4.4068],
        [-0.2574,  0.1553],
        [ 0.0881, -0.0865],
        [ 0.9769, -1.0817],
        [ 0.6048, -0.6712],
        [ 1.5756, -1.7580],
        [ 0.4051, -0.3444],
        [-0.5869,  0.3890],
        [ 2.

 50%|█████     | 5/10 [00:51<00:50, 10.17s/it]

outputs tensor([[ 1.5809, -1.8205],
        [ 0.9053, -0.8965],
        [-0.7465,  0.5216],
        [ 0.3023, -0.3269],
        [ 0.9314, -0.9173],
        [ 3.0611, -4.1816],
        [ 1.6467, -2.0851],
        [ 3.0593, -4.2112],
        [ 0.0958, -0.1782],
        [ 2.1601, -2.8061],
        [ 2.1919, -2.7956],
        [ 0.5249, -0.5990],
        [ 1.1484, -1.3554],
        [ 3.0651, -4.1138],
        [ 1.6661, -2.1306],
        [ 0.9313, -0.8820],
        [ 0.6842, -0.6384],
        [ 0.9097, -0.9557],
        [ 0.7719, -0.8406],
        [ 1.1499, -1.3528],
        [ 1.7006, -2.0670],
        [-0.7157,  0.4928],
        [ 0.3856, -0.3822],
        [ 0.4429, -0.4849],
        [ 0.0896, -0.1218],
        [ 2.4318, -3.2813],
        [ 3.2493, -4.5484],
        [ 0.2568, -0.3236],
        [-0.4480,  0.2481],
        [ 2.3294, -3.1027],
        [ 0.5057, -0.5203],
        [ 3.2368, -4.4782],
        [ 0.6505, -0.6001],
        [ 0.5776, -0.5609],
        [ 2.2334, -2.9504],
        [ 1.

 60%|██████    | 6/10 [01:00<00:39,  9.93s/it]

outputs tensor([[ 0.2979, -0.2972],
        [ 1.8362, -2.4582],
        [ 0.7366, -0.6694],
        [ 2.0085, -2.5937],
        [ 1.2115, -1.4248],
        [-0.4753,  0.3163],
        [ 1.3525, -1.5907],
        [ 0.4667, -0.4482],
        [ 0.6078, -0.6428],
        [ 0.1348, -0.1804],
        [ 1.0549, -1.1902],
        [ 1.9076, -2.3544],
        [ 0.5685, -0.5424],
        [ 0.4380, -0.4001],
        [ 1.4051, -1.7126],
        [-1.1560,  0.7963],
        [ 1.0088, -1.0822],
        [ 1.8775, -2.2720],
        [ 0.6222, -0.6829],
        [ 2.1658, -2.8628],
        [ 0.0203, -0.0838],
        [-0.1734,  0.0512],
        [ 0.8452, -0.8364],
        [ 1.2078, -1.5301],
        [ 1.1717, -1.3945],
        [ 3.4056, -4.7133],
        [ 2.3565, -3.1401],
        [ 0.8513, -0.8634],
        [ 1.0493, -1.1757],
        [ 1.7125, -2.1639],
        [-0.0559, -0.0052],
        [ 1.6108, -2.0781],
        [ 2.3302, -3.0435],
        [ 0.3241, -0.3549],
        [ 1.8944, -2.3848],
        [-1.

 70%|███████   | 7/10 [01:10<00:29,  9.81s/it]

outputs tensor([[ 1.5764, -1.8678],
        [ 0.7884, -0.7653],
        [ 1.0946, -1.2014],
        [ 0.9841, -1.1151],
        [ 0.8048, -0.8157],
        [ 2.9103, -3.9958],
        [ 2.6431, -3.5652],
        [-0.8306,  0.5562],
        [ 2.1635, -2.9062],
        [ 1.1408, -1.3023],
        [ 0.3835, -0.3382],
        [ 1.2449, -1.4753],
        [-1.6652,  1.2806],
        [-0.5448,  0.3544],
        [ 2.0320, -2.5403],
        [ 1.1742, -1.4319],
        [ 0.4299, -0.4754],
        [ 0.8618, -0.9934],
        [ 0.9794, -0.9085],
        [ 1.0610, -1.0854],
        [ 2.9525, -4.0475],
        [ 2.3091, -2.9513],
        [ 1.7425, -2.2298],
        [ 2.3331, -3.1485],
        [-0.4753,  0.3177],
        [ 0.7743, -0.7433],
        [ 1.6207, -1.8629],
        [-0.8605,  0.6327],
        [ 2.7377, -3.6481],
        [-0.6318,  0.4331],
        [ 1.8320, -2.2750],
        [ 0.1291, -0.2250],
        [ 1.1375, -1.2836],
        [ 1.6750, -2.0954],
        [ 1.8823, -2.4532],
        [ 2.

 80%|████████  | 8/10 [01:19<00:19,  9.74s/it]

outputs tensor([[ 1.7734, -2.2650],
        [ 0.3010, -0.3837],
        [ 0.1356, -0.1633],
        [ 1.2101, -1.4154],
        [ 1.1278, -1.2951],
        [ 2.0057, -2.6313],
        [ 0.1439, -0.2733],
        [ 1.5770, -1.9923],
        [ 2.5554, -3.4490],
        [-0.7681,  0.4918],
        [ 0.0371, -0.1169],
        [ 2.5704, -3.3979],
        [ 0.8508, -0.7866],
        [-0.6235,  0.4276],
        [-0.6557,  0.4500],
        [ 1.0544, -1.2351],
        [ 2.2419, -3.0295],
        [ 0.4626, -0.4985],
        [ 2.2671, -2.8204],
        [ 0.1216, -0.1768],
        [-0.2478,  0.1453],
        [ 1.7012, -2.1624],
        [ 1.3710, -1.7087],
        [ 0.4921, -0.5397],
        [ 2.2353, -2.9940],
        [ 1.4598, -1.7378],
        [-0.1120,  0.0088],
        [ 0.2900, -0.2933],
        [ 2.7308, -3.6915],
        [ 0.6681, -0.6338],
        [ 0.5476, -0.5655],
        [-0.0182, -0.0431],
        [ 0.5176, -0.4665],
        [ 1.6296, -2.0139],
        [ 3.1602, -4.3098],
        [-0.

 90%|█████████ | 9/10 [01:29<00:09,  9.65s/it]

outputs tensor([[ 0.7624, -0.7806],
        [ 2.7317, -3.7674],
        [ 0.5661, -0.5445],
        [ 0.7426, -0.7577],
        [ 2.9704, -4.0422],
        [ 2.2752, -2.8934],
        [ 0.3784, -0.4633],
        [ 1.9228, -2.4718],
        [ 0.4228, -0.5304],
        [ 0.3027, -0.3272],
        [ 1.3219, -1.5156],
        [ 1.5127, -1.8710],
        [ 3.0133, -4.2220],
        [ 0.9910, -1.0093],
        [ 1.7529, -2.3256],
        [ 2.1924, -2.8709],
        [-0.2434,  0.1381],
        [ 0.5695, -0.5498],
        [ 0.2997, -0.3182],
        [ 1.5938, -2.0327],
        [ 3.1337, -4.2430],
        [ 0.2530, -0.2545],
        [ 3.2709, -4.4012],
        [ 0.3689, -0.3225],
        [ 2.4414, -3.3548],
        [ 1.1888, -1.3539],
        [ 0.3530, -0.3665],
        [-0.4493,  0.1955],
        [ 1.4870, -1.7547],
        [ 0.2208, -0.2605],
        [ 3.1610, -4.3177],
        [ 0.8231, -0.8268],
        [ 0.5706, -0.5776],
        [ 0.3683, -0.4241],
        [ 0.9461, -0.8789],
        [-0.

100%|██████████| 10/10 [01:39<00:00,  9.75s/it]

outputs tensor([[ 2.2647, -3.0695],
        [ 2.8377, -3.8611],
        [ 0.5252, -0.4887],
        [ 1.7487, -2.0800],
        [ 0.9887, -1.1579],
        [ 1.6935, -2.2079],
        [ 2.3077, -3.0253],
        [ 0.5529, -0.5719],
        [ 2.4633, -3.2515],
        [ 0.8945, -0.9497],
        [ 0.1634, -0.1859],
        [ 2.3993, -3.2378],
        [ 2.3165, -3.0290],
        [-0.9527,  0.6613],
        [ 2.0630, -2.6144],
        [ 1.9488, -2.3595],
        [ 3.6641, -5.0892],
        [ 0.6562, -0.6812],
        [ 0.7984, -0.8314],
        [ 0.7396, -0.7030],
        [ 2.9053, -3.9379],
        [-0.8917,  0.6697],
        [-0.9252,  0.6796],
        [-0.5458,  0.3601],
        [ 1.7670, -2.1598],
        [ 0.8765, -0.7794],
        [ 0.4709, -0.4213],
        [ 0.9001, -1.0037],
        [ 0.8945, -1.0031],
        [ 0.8422, -0.8223],
        [ 0.9876, -1.0368],
        [ 1.6426, -2.0510],
        [ 0.4531, -0.5284],
        [ 3.5111, -4.8134],
        [ 2.3846, -3.0691],
        [ 1.


