In [7]:
import os
import torch
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
from lung_segmentation_network import UNet
from skimage import morphology
import cv2

In [None]:
os.environ['CUDA_VISIBLE_DEVICES']='2'

In [None]:
import glob
CT_images = glob.glob('/data2/Cpeng/CT_images_lung_segmentation/*.jpg')
print(len(CT_images))
# lung_masks = glob.glob('/data2/Cpeng/lung_masks/*.jpg')
# print(len(lung_masks))  #3520 original

In [4]:
# image = cv2.imread(CT_images[300],0)
# plt.imshow(image)
# print(image.shape)
# print(image.max())
# print(image.min())

# mask = cv2.imread(lung_masks[300],0)
# plt.figure()
# plt.imshow(mask)
# print(mask.shape)
# print(mask.max())
# print(mask.min())

In [5]:
from sklearn.model_selection import train_test_split
train_X,test_X,_,_ = train_test_split(CT_images,CT_images,test_size = 0.01, random_state = 42)
print(len(train_X))

4197


In [6]:
import cv2
from torch.utils.data import Dataset
class Lung_segmentation(Dataset):
    def __init__(self, file_path_list, phase='train'):
        self.path_list = file_path_list
        self.phase = phase
    
    def __getitem__(self, index):
        
        image, label= self.load_data(self.path_list[index])
        image, label = self.process_data(image, label)
        
        return image, label
        
    def load_data(self, file_path):
        image = cv2.imread(file_path,0)
        label_path = os.path.join('/data2/Cpeng/lung_masks/',file_path.split('/')[-1])
        label = cv2.imread(label_path,0)
        
        image = cv2.resize(image, (256, 256), interpolation=cv2.INTER_CUBIC)
        image = image-image.min()
        image = (image/image.max()).astype(np.float32)
        label = cv2.resize(label, (256, 256), interpolation=cv2.INTER_CUBIC)
        if label.max()>0:
            label = (label/label.max()).astype(np.float32)
        else:
            label = label.astype(np.float32)
        return image, label
        
    def process_data(self, *args):
        return [item[np.newaxis, :, :].astype(np.float32) for item in args]
    
    def __len__(self):
        return len(self.path_list)

In [7]:
torch.backends.cudnn.benchmark = True
device = torch.device('cpu' if not torch.cuda.is_available() else 'cuda')
batch_size = 16
num_workers = 16

In [8]:
trainset = Lung_segmentation(file_path_list=train_X, phase='train')
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

valset = Lung_segmentation(file_path_list=test_X, phase='test')
val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

In [10]:
from dice_loss import *
segmentation_model = UNet(1,1).cuda()
checkpoint = torch.load('/data2/Cpeng/model_lung_segmentation/checkpoint_{}.pth.tar'.format(240), map_location="cuda:0")
segmentation_model.load_state_dict(checkpoint['state_dict'])
from torch import optim
from torch.optim.lr_scheduler import *
criterion = DiceLoss().cuda()
optimizer = torch.optim.Adam(segmentation_model.parameters(), lr=1e-3)
scheduler = StepLR(optimizer, 20, gamma=0.5)

In [7]:
print(segmentation_model)

In [12]:
def save_checkpoint(state, checkpoint='checkpoint', snapshot=1):
    if not os.path.exists(checkpoint): os.makedirs(checkpoint)
    if snapshot and state['epoch'] % snapshot == 0:
        torch.save(state, os.path.join(checkpoint, 'checkpoint_{}.pth.tar'.format(state['epoch'])))

In [13]:
import matplotlib.pyplot as plt
import scipy.io as sio
step=0
epoches=421

for epoch in range(241,epoches):
    print('epoch:' + str(epoch))
    scheduler.step()
    loss_train=0.0
    loss_val=0.0
    segmentation_model.train()
    for i,data in enumerate(train_loader):
        image, label = [item.to(device) for item in data]
        output = segmentation_model(image)
        loss_train=criterion(output,label.float())
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()
    with torch.no_grad():
        segmentation_model.eval()
        for i,data in enumerate(val_loader):
            image, label = [item.to(device) for item in data]
            output= segmentation_model(image)
            loss_val=criterion(output,label.float())
    print('Train_Loss: {}'.format(loss_train))
    print('Val_Loss: {}'.format(loss_val))
#     writer.add_scalar('train_loss',loss_train, epoch)
#     writer.add_scalar('val_loss',loss_val, epoch)
    if epoch%10==0:
        save_checkpoint({
            'epoch': epoch,
            'state_dict': segmentation_model.state_dict(),
            }, checkpoint='/data2/Cpeng/model_lung_segmentation', snapshot=1)

epoch:241




Train_Loss: tensor([0.1184], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0319], device='cuda:0')
epoch:242
Train_Loss: tensor([0.0204], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0233], device='cuda:0')
epoch:243
Train_Loss: tensor([0.2199], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0275], device='cuda:0')
epoch:244
Train_Loss: tensor([0.0666], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0416], device='cuda:0')
epoch:245
Train_Loss: tensor([0.0197], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0252], device='cuda:0')
epoch:246
Train_Loss: tensor([0.0276], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0248], device='cuda:0')
epoch:247
Train_Loss: tensor([0.0388], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0256], device='cuda:0')
epoch:248
Train_Loss: tensor([0.0879], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0205], device='cuda:0')
epoch:249
Train_Lo

Train_Loss: tensor([0.2056], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0212], device='cuda:0')
epoch:309
Train_Loss: tensor([0.0270], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0191], device='cuda:0')
epoch:310
Train_Loss: tensor([0.0180], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0208], device='cuda:0')
epoch:311
Train_Loss: tensor([0.0079], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0201], device='cuda:0')
epoch:312
Train_Loss: tensor([0.0172], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0250], device='cuda:0')
epoch:313
Train_Loss: tensor([0.0131], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0239], device='cuda:0')
epoch:314
Train_Loss: tensor([0.0209], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0195], device='cuda:0')
epoch:315
Train_Loss: tensor([0.0213], device='cuda:0', grad_fn=<DivBackward0>)
Val_Loss: tensor([0.0203], device='cuda:0')
epoch:316
Train_Lo

KeyboardInterrupt: 

In [None]:
image_path = '/data2/Cpeng/results/'+str(10)+'_image.jpg'
cv2.imwrite(image_path,image)
mask_path = '/data2/Cpeng/results/'+str(10)+'_mask.jpg'
cv2.imwrite(mask_path,output)

In [21]:
images = glob.glob('/data2/Cpeng/NCP_raw_images/*')

In [3]:
image_path = images[180]
image=cv2.imread(image_path,0)
mask_path = '/data2/Cpeng/NCP_lung_masks/'+image_path.split('/')[-1]
mask = cv2.imread(mask_path,0)
plt.imshow(image)
plt.figure()
plt.imshow(mask)

In [None]:
save_image = '/data2/Cpeng/results/180_image.jpg'
cv2.imwrite(save_image,image)
save_mask = '/data2/Cpeng/results/180_mask.jpg'
cv2.imwrite(save_mask,mask)

In [6]:
lung_image = image*mask
plt.imshow(lung_image)
save_lung_image= '/data2/Cpeng/results/180_lung_image.jpg'
cv2.imwrite(save_lung_image,lung_image)

In [52]:
save_image = '/data2/Cpeng/results/180_image.jpg'
cv2.imwrite(save_image,image)
save_mask = '/data2/Cpeng/results/180_mask.jpg'
cv2.imwrite(save_mask,mask)

True

In [5]:
import scipy
mask[mask>0]=1
mt = scipy.ndimage.distance_transform_edt(mask)
plt.imshow(mt,cmap='gray')
save_map = '/data2/Cpeng/results/180_map.jpg'
cv2.imwrite(save_map,mt)

# dice计算

In [9]:
from dice_loss import *
torch.backends.cudnn.benchmark = True
device = torch.device('cpu' if not torch.cuda.is_available() else 'cuda')
segmentation_model = UNet(1,1).cuda()
test=[]
test.append(test_X[10])
print(test_X[10])
ts = Lung_segmentation(file_path_list=test,phase = 'test')
loader = torch.utils.data.DataLoader(ts, batch_size=8, shuffle=True, num_workers=8, drop_last=True)
valset = Lung_segmentation(file_path_list=test_X, phase='test')
val_loader = torch.utils.data.DataLoader(valset, batch_size=1, shuffle=True, num_workers=1)
checkpoint = torch.load('/data2/Cpeng/model_lung_segmentation/checkpoint_{}.pth.tar'.format(200), map_location="cuda:0")
segmentation_model.load_state_dict(checkpoint['state_dict'])
DS=0.0
num=0

/data2/Cpeng/CT_images_lung_segmentation/coronacases_004_116.jpg


In [4]:
segmentation_model.eval()
with torch.no_grad():
    for i, data in enumerate(val_loader):
        image, label = [item.to(device) for item in data]
        output = segmentation_model(image)
    #     output = torch.sigmoid(output)
    #     output[output>0.1]=1.0
    #     output[output<0.1]=0.0
    #     print(type(output))
    #     print(output)
        DS = DS + DiceScore()(output,label.float())
#         print(DS)
        num = num +1
print('Dice_Score:{}'.format(DS/num))
output=output[0,0,...].cpu().detach().numpy()
label=label[0,0,...].cpu().detach().numpy()
image=image[0,0,...].cpu().detach().numpy()
import matplotlib.pyplot as plt
plt.imshow(output,cmap='gray')
plt.figure()
output[output>0.1]=1
output[output<0.1]=0
plt.imshow(output,cmap='gray')
plt.figure()
plt.imshow(label)
plt.figure()
plt.imshow(image)

# DS: 0.1-0.9661 0.5-0.9669

# 模型推理

# NCP images

In [4]:
import cv2
from torch.utils.data import Dataset
class Lung_segmentation_1(Dataset):
    def __init__(self, file_path_list, phase='train'):
        self.path_list = file_path_list
        self.phase = phase
    
    def __getitem__(self, index):
        
        image= self.load_data(self.path_list[index])
        image = self.process_data(image)
        
        return image, self.path_list[index]
        
    def load_data(self, file_path):
        image = cv2.imread(file_path,0)
        image = cv2.resize(image, (256, 256), interpolation=cv2.INTER_CUBIC)
        image = image-image.min()
        image = (image/image.max()).astype(np.float32)
        
        return image
        
    def process_data(self, *args):
        return [item[np.newaxis, :, :].astype(np.float32) for item in args]
    
    def __len__(self):
        return len(self.path_list)
    


In [14]:
# import pandas as pd
# lesion_slices_dir = "/data2/Cpeng/COVID/"
# lesion_slices = pd.read_csv('/data2/Cpeng/COVID/lesions_slices.csv')
# NCP_slices = lesion_slices['imgpath'].tolist()[36894:58766]
# print(NCP_slices[0])
# print(len(NCP_slices))
# NCP_image_dir = '/data2/Cpeng/COVID/'
# test_image = cv2.imread(lesion_slices_dir+NCP_slices[0],0)
# print(test_image.shape)
# print(test_image.min())
# print(test_image.max())
# NCP_slices_raw = []
# for i in range(len(NCP_slices)):
#     if os.path.exists(lesion_slices_dir+NCP_slices[i]):
#         NCP_slices_raw.append(lesion_slices_dir+NCP_slices[i])
# print(len(NCP_slices_raw))

NCP/1010/2572/0013.JPG
(512, 512)
72
255
21310


In [16]:
# NCP_raw_images_file = '/data2/Cpeng/COVID/NCP_raw_images.csv'
# df = pd.DataFrame(NCP_slices_raw)
# df.to_csv(NCP_raw_images_file,index=None,header='imgpath')

In [5]:
import pandas as pd
NCP_slices = pd.read_csv('/data2/Cpeng/COVID/NCP_raw_images.csv')
NCP_slices_raw = NCP_slices['0'].tolist()

In [2]:
print(NCP_slices_raw)

In [1]:
#save image slices
images = glob.glob('/data2/Cpeng/NCP_raw_images/*')
print(len(images))
print(images[0])
for i in range(len(images)):
    image = cv2.imread(images[i],0)
    path_ = ('/').join(images[i].split('/')[-1].split('_')[0:3])
    save_path_ = os.path.join('/data2/Cpeng/NCP_raw_images_patients/',('/').join(images[i].split('/')[-1].split('_')[0:2]))
    print(save_path_)
    if not os.path.exists(save_path_):
        os.makedirs(save_path_)
    save_path = os.path.join('/data2/Cpeng/NCP_raw_images_patients/',('/').join(images[i].split('/')[-1].split('_')[0:3]))
#     print(save_path)
#     break
    cv2.imwrite(save_path,image)

In [7]:
torch.backends.cudnn.benchmark = True
device = torch.device('cpu' if not torch.cuda.is_available() else 'cuda')
segmentation_model = UNet(1,1).cuda()
checkpoint = torch.load('/data2/Cpeng/model_lung_segmentation/checkpoint_{}.pth.tar'.format(360), map_location="cuda:0")
segmentation_model.load_state_dict(checkpoint['state_dict'])
valset = Lung_segmentation_1(file_path_list=NCP_slices_raw, phase='test')
val_loader = torch.utils.data.DataLoader(valset, batch_size=40, shuffle=True, num_workers=4)

In [8]:
segmentation_model.eval()
COVID_lung_masks_dir = "/data2/Cpeng/NCP_lung_masks/"
COVID_images_dir = "/data2/Cpeng/NCP_raw_images/"
with torch.no_grad():
    for i, data in enumerate(val_loader):
        image,path= data
        image = image[0].cuda()
#         print(path)
        output = segmentation_model(image)
        output = torch.sigmoid(output)
        output[output>0.5]=1.0
        output[output<0.5]=0.0
        output=output.cpu().detach().numpy()
#         output = (output*255).astype(np.uint8)
        image=image.cpu().detach().numpy()
        for j in range(output.shape[0]):
            mask = output[j,0,...]
#             image_ = image[j,0,...]
#             plt.imshow(mask)
#             plt.figure()
            mask = morphology.closing(mask,morphology.square(5))
            mask[mask>0]=1.
            mask = mask*255
#             plt.imshow(mask)
#             plt.figure()
#             plt.imshow(image_)
            image_ = image[j,0,...]
            image_ = image_*255
            save_name_mask = os.path.join(COVID_lung_masks_dir,('_').join(path[j].split('/')[5:8]))
            save_name_image = os.path.join(COVID_images_dir,('_').join(path[j].split('/')[5:8]))
#             print(save_name)
#             break
#         break
            cv2.imwrite(save_name_mask,mask)
            cv2.imwrite(save_name_image,image_)

# CP images

In [9]:
#save image slices
images = glob.glob('/data2/Cpeng/CP_raw_images/*')
print(len(images))
print(images[0])
for i in range(len(images)):
    image = cv2.imread(images[i],0)
    path_ = ('/').join(images[i].split('/')[-1].split('_')[0:3])
    save_path_ = os.path.join('/data2/Cpeng/CP_raw_images_patients/',('/').join(images[i].split('/')[-1].split('_')[0:2]))
#     print(save_path_)
    if not os.path.exists(save_path_):
        os.makedirs(save_path_)
    save_path = os.path.join('/data2/Cpeng/CP_raw_images_patients/',('/').join(images[i].split('/')[-1].split('_')[0:3]))
#     print(save_path)
#     break
    cv2.imwrite(save_path,image)

In [9]:
import pandas as pd
lesion_slices_dir = "/data2/Cpeng/COVID/"
lesion_slices = pd.read_csv('/data2/Cpeng/COVID/lesions_slices.csv')
CP_slices = lesion_slices['imgpath'].tolist()[0:36894]
print(CP_slices[0])
print(len(CP_slices))
CP_image_dir = '/data2/Cpeng/COVID/'
test_image = cv2.imread(lesion_slices_dir+CP_slices[0],0)
print(test_image.shape)
print(test_image.min())
print(test_image.max())
CP_slices_raw = []
for i in range(len(CP_slices)):
    if os.path.exists(lesion_slices_dir+CP_slices[i]):
        CP_slices_raw.append(lesion_slices_dir+CP_slices[i])
print(len(CP_slices_raw))

CP_raw_images_file = '/data2/Cpeng/COVID/CP_raw_images.csv'
df = pd.DataFrame(CP_slices_raw)
df.to_csv(CP_raw_images_file,header='imgpath')

CP/3783/5727/0008.png
36894
(512, 512)
37
255
36781


In [17]:
print(CP_slices_raw[0])

/data2/Cpeng/COVID/CP/3783/5727/0008.png


In [10]:
import cv2
from torch.utils.data import Dataset
class Lung_segmentation_1(Dataset):
    def __init__(self, file_path_list, phase='train'):
        self.path_list = file_path_list
        self.phase = phase
    
    def __getitem__(self, index):
        
        image= self.load_data(self.path_list[index])
        image = self.process_data(image)
        
        return image, self.path_list[index]
        
    def load_data(self, file_path):
        image = cv2.imread(file_path,0)
        image = cv2.resize(image, (256, 256), interpolation=cv2.INTER_CUBIC)
        image = image-image.min()
        image = (image/image.max()).astype(np.float32)
        
        return image
        
    def process_data(self, *args):
        return [item[np.newaxis, :, :].astype(np.float32) for item in args]
    
    def __len__(self):
        return len(self.path_list)
    


In [26]:
torch.backends.cudnn.benchmark = True
device = torch.device('cpu' if not torch.cuda.is_available() else 'cuda')
segmentation_model = UNet(1,1).cuda()
checkpoint = torch.load('/data2/Cpeng/model_lung_segmentation/checkpoint_{}.pth.tar'.format(360), map_location="cuda:0")
segmentation_model.load_state_dict(checkpoint['state_dict'])
valset = Lung_segmentation_1(file_path_list=CP_slices_raw, phase='test')
val_loader = torch.utils.data.DataLoader(valset, batch_size=40, shuffle=True, num_workers=4)

In [28]:
segmentation_model.eval()
CP_lung_masks_dir = "/data2/Cpeng/CP_lung_masks/"
CP_images_dir = "/data2/Cpeng/CP_raw_images/"
with torch.no_grad():
    for i, data in enumerate(val_loader):
        image,path= data
        image = image[0].cuda()
#         print(path)
        output = segmentation_model(image)
        output = torch.sigmoid(output)
        output[output>0.5]=1.0
        output[output<0.5]=0.0
        output=output.cpu().detach().numpy()
#         output = (output*255).astype(np.uint8)
        image=image.cpu().detach().numpy()
        for j in range(output.shape[0]):
            mask = output[j,0,...]
#             image_ = image[j,0,...]
#             plt.imshow(mask)
#             plt.figure()
            mask = morphology.closing(mask,morphology.square(5))
            mask[mask>0]=1.
            mask = mask*255
#             plt.imshow(mask)
            
            image_ = image[j,0,...]
            image_ = image_*255
#             plt.figure()
#             plt.imshow(image_)
            save_name_mask = os.path.join(CP_lung_masks_dir,('_').join(path[j].split('/')[5:8]))
            save_name_image = os.path.join(CP_images_dir,('_').join(path[j].split('/')[5:8]))
#             print(save_name)
#             break
#         break
            cv2.imwrite(save_name_mask,mask)
            cv2.imwrite(save_name_image,image_)

# Normal_images

In [8]:
#save image slices
images = glob.glob('/data2/Cpeng/Normal_raw_images/*')
print(len(images))
print(images[0])
for i in range(len(images)):
    image = cv2.imread(images[i],0)
    path_ = ('/').join(images[i].split('/')[-1].split('_')[0:3])
    save_path_ = os.path.join('/data2/Cpeng/Normal_raw_images_patients/',('/').join(images[i].split('/')[-1].split('_')[0:2]))
#     print(save_path_)
    if not os.path.exists(save_path_):
        os.makedirs(save_path_)
    save_path = os.path.join('/data2/Cpeng/Normal_raw_images_patients/',('/').join(images[i].split('/')[-1].split('_')[0:3]))
#     print(save_path)
#     break
    cv2.imwrite(save_path,image)

In [4]:
Normal_slices_raw=glob.glob('/data2/Cpeng/COVID/Normal_only_raw_images/Normal/*/*/*')
print(len(Normal_slices_raw))

45758


In [5]:
import cv2
from torch.utils.data import Dataset
class Lung_segmentation_1(Dataset):
    def __init__(self, file_path_list, phase='train'):
        self.path_list = file_path_list
        self.phase = phase
    
    def __getitem__(self, index):
        
        image= self.load_data(self.path_list[index])
        image = self.process_data(image)
        
        return image, self.path_list[index]
        
    def load_data(self, file_path):
        image = cv2.imread(file_path,0)
        image = cv2.resize(image, (256, 256), interpolation=cv2.INTER_CUBIC)
        image = image-image.min()
        image = (image/image.max()).astype(np.float32)
        
        return image
        
    def process_data(self, *args):
        return [item[np.newaxis, :, :].astype(np.float32) for item in args]
    
    def __len__(self):
        return len(self.path_list)
    


In [6]:
torch.backends.cudnn.benchmark = True
device = torch.device('cpu' if not torch.cuda.is_available() else 'cuda')
segmentation_model = UNet(1,1).cuda()
checkpoint = torch.load('/data2/Cpeng/model_lung_segmentation/checkpoint_{}.pth.tar'.format(360), map_location="cuda:0")
segmentation_model.load_state_dict(checkpoint['state_dict'])
valset = Lung_segmentation_1(file_path_list=Normal_slices_raw, phase='test')
val_loader = torch.utils.data.DataLoader(valset, batch_size=40, shuffle=True, num_workers=16)

In [7]:
segmentation_model.eval()
Normal_lung_masks_dir = "/data2/Cpeng/Normal_lung_masks/"
Normal_images_dir = "/data2/Cpeng/Normal_raw_images/"
with torch.no_grad():
    for i, data in enumerate(val_loader):
        image,path= data
        image = image[0].cuda()
#         print(path)
        output = segmentation_model(image)
        output = torch.sigmoid(output)
        output[output>0.5]=1.0
        output[output<0.5]=0.0
        output=output.cpu().detach().numpy()
#         output = (output*255).astype(np.uint8)
        image=image.cpu().detach().numpy()
        for j in range(output.shape[0]):
            mask = output[j,0,...]
#             image_ = image[j,0,...]
#             plt.imshow(mask)
#             plt.figure()
            mask = morphology.closing(mask,morphology.square(5))
            mask[mask>0]=1.
            mask = mask*255
#             plt.imshow(mask)
            
            image_ = image[j,0,...]
            image_ = image_*255
#             plt.figure()
#             plt.imshow(image_)
            if mask.sum()>0:
                save_name_mask = os.path.join(Normal_lung_masks_dir,('_').join(path[j].split('/')[6:9]))
                save_name_image = os.path.join(Normal_images_dir,('_').join(path[j].split('/')[6:9]))
    #             print(save_name)
#             break
#         break
                cv2.imwrite(save_name_mask,mask)
                cv2.imwrite(save_name_image,image_)

# Re-split the dataset

In [14]:
NCP_images_patients = glob.glob('/data2/Cpeng/NCP_raw_images_patients/*/*')
CP_images_patients = glob.glob('/data2/Cpeng/CP_raw_images_patients/*/*')
Normal_images_patients = glob.glob('/data2/Cpeng/Normal_raw_images_patients/*/*')
print(len(NCP_images_patients))
print(len(CP_images_patients))
print(len(Normal_images_patients))

825
1043
472


In [15]:
print(NCP_images_patients[0])
index = '_'.join(patients_images[i].split('/')[4:7])

/data2/Cpeng/NCP_raw_images_patients/222/1588


In [22]:
from sklearn.model_selection import train_test_split
import tqdm
import glob
import os
NCP_images_patients = glob.glob('/data2/Cpeng/NCP_raw_images_patients/*/*')
CP_images_patients = glob.glob('/data2/Cpeng/CP_raw_images_patients/*/*')
Normal_images_patients = glob.glob('/data2/Cpeng/Normal_raw_images_patients/*/*')
NCP_train_patients, NCP_val_patients, _, _ = train_test_split(NCP_images_patients, NCP_images_patients, test_size=0.2, random_state=42)
CP_train_patients, CP_val_patients, _, _ = train_test_split(CP_images_patients, CP_images_patients, test_size=0.2, random_state=42)
Normal_train_patients, Normal_val_patients, _, _ = train_test_split(Normal_images_patients, Normal_images_patients, test_size=0.2, random_state=42)
NCP_train = []
NCP_val = []
CP_train = []
CP_val = []
Normal_train = []
Normal_val = []
############### For NCP
for i in tqdm.tqdm(range(len(NCP_train_patients))):
    patients_images = glob.glob(NCP_train_patients[i]+'/*')
    for j in range(len(patients_images)):
        index = '_'.join(patients_images[j].split('/')[4:7])
        path = os.path.join('/'.join(patients_images[j].split('/')[0:4]),index)
        NCP_train.append(path)
for i in tqdm.tqdm(range(len(NCP_val_patients))):
    patients_images = glob.glob(NCP_val_patients[i]+'/*')
    for j in range(len(patients_images)):
        index = '_'.join(patients_images[j].split('/')[4:7])
        path = os.path.join('/'.join(patients_images[j].split('/')[0:4]),index)
        NCP_val.append(path)
############### For CP
for i in tqdm.tqdm(range(len(CP_train_patients))):
    patients_images = glob.glob(CP_train_patients[i]+'/*')
    for j in range(len(patients_images)):
        index = '_'.join(patients_images[j].split('/')[4:7])
        path = os.path.join('/'.join(patients_images[j].split('/')[0:4]),index)
        CP_train.append(path)
for i in tqdm.tqdm(range(len(CP_val_patients))):
    patients_images = glob.glob(CP_val_patients[i]+'/*')
    for j in range(len(patients_images)):
        index = '_'.join(patients_images[j].split('/')[4:7])
        path = os.path.join('/'.join(patients_images[j].split('/')[0:4]),index)
        CP_val.append(path)
############### For Normal
for i in tqdm.tqdm(range(len(Normal_train_patients))):
    patients_images = glob.glob(Normal_train_patients[i]+'/*')
    for j in range(len(patients_images)):
        index = '_'.join(patients_images[j].split('/')[4:7])
        path = os.path.join('/'.join(patients_images[j].split('/')[0:4]),index)
        Normal_train.append(path)
for i in tqdm.tqdm(range(len(Normal_val_patients))):
    patients_images = glob.glob(Normal_val_patients[i]+'/*')
    for j in range(len(patients_images)):
        index = '_'.join(patients_images[j].split('/')[4:7])
        path = os.path.join('/'.join(patients_images[j].split('/')[0:4]),index)
        Normal_val.append(path)
print(len(NCP_train))
print(len(NCP_val))
print(len(CP_train))
print(len(CP_val))
print(len(Normal_train))
print(len(Normal_val))

100%|██████████| 660/660 [00:00<00:00, 5410.94it/s]
100%|██████████| 165/165 [00:00<00:00, 5094.33it/s]
100%|██████████| 834/834 [00:00<00:00, 4120.19it/s]
100%|██████████| 209/209 [00:00<00:00, 3806.12it/s]
100%|██████████| 377/377 [00:00<00:00, 1673.53it/s]
100%|██████████| 95/95 [00:00<00:00, 1708.87it/s]

16839
4471
28821
7960
34622
8540





In [23]:
print(NCP_train[0])

/data2/Cpeng/NCP_raw_images_patients/265_1677_0019.png
