In [100]:
#测试随机生成bbox，最后检查生成图片情况
from PIL import Image
from torchvision import transforms
from config import DATA_CONFIG
import random

def data_transforms(patch):
    data_aug = DATA_CONFIG['data_augmentation']
    patch_size = DATA_CONFIG['patch_size']
    mean,std = DATA_CONFIG['mean'],DATA_CONFIG['std']

    transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.ColorJitter(
                brightness=data_aug['brightness'],
                contrast=data_aug['contrast'],
                saturation=data_aug['saturation'],
                hue=data_aug['hue']
            ),
            transforms.RandomResizedCrop(
                size=(patch_size, patch_size),
                scale=data_aug['scale'],
                ratio=data_aug['ratio']
            ),
            transforms.RandomAffine(
                degrees=data_aug['degrees'],
                translate=data_aug['translate']
            ),
            transforms.RandomGrayscale(0.2),
            transforms.ToTensor(),
            transforms.Normalize(mean, std),
        ])
    patch_1 = generate_patch(patch)
    patch_2 = generate_patch(patch)
    patch_1 = transform(patch_1)
    patch_2 = transform(patch_2)
    return patch_1,patch_2

def generate_patch(img, bbox):
    w, h = img.size
    i = 0
    # if i == 0 and len(bbox) == 4 :
    #     print("bbox:",bbox)
    # if len(bbox) != 4:
    #     print("bbox:",bbox)
    #     print("img_size:",img.size)
    x1, y1, x2, y2 = bbox
    # b_w = random.randint(32, 128)
    # b_h = random.randint(32, 128)
    # x1 = random.randint(10, w-b_w)
    # y1 = random.randint(10, h-b_h)
    # x2 = x1 + b_w
    # y2 = y1 + b_h
    b_w = bbox[2] - bbox[0]
    b_h = bbox[3] - bbox[1]

    x_space = 128 - b_w
    if x1 < w - b_w:
        l_shift = int(random.random() * min(x1, x_space))
        new_x1 = x1 - l_shift
        new_x2 = x2 + (x_space - l_shift)
    else:
        r_shift = int(random.random() * min(w - b_w, x_space))
        new_x1 = x1 - (x_space - r_shift)
        new_x2 = x2 + r_shift

    y_space = 128 - b_h
    if y1 < h - b_h:
        t_shift = int(random.random() * min(y1, y_space))
        new_y1 = y1 - t_shift
        new_y2 = y2 + (y_space - t_shift)
    else:
        d_shift = int(random.random() * min(h - b_h, y_space))
        new_y1 = y1 - (y_space - d_shift)
        new_y2 = y2 + d_shift

    patch = img.crop((new_x1, new_y1, new_x2, new_y2))
    return patch

def pil_loader(path):
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

path = '/dataset/evaluate/train/0/10_left.jpeg'
image = pil_loader(path)
for i in range(10):
    x1 = random.randint(60,452)
    x2 = random.randint(60,452)
    y1 = random.randint(60,452)
    y2 = random.randint(60,452)
    bbox = [x1,x2,y1,y2]
    img = generate_patch(image,bbox)
    patch_1,patch_2 = data_transforms(img)
    save_path = '/dataset/lesion_image/healthy/'+str(i)+path.split('/')[-1]
    patch_1.save(save_path,quality=95,subsampling=0)


In [26]:
#遍历pkl文件，给出lesion数量和lesion所属label
import pickle,re
import os

# data = pickle.load(open('/dataset/lesion_predictions_new/EyePACS_newpredictions_128_08.pkl','rb'))
data = pickle.load(open('./lesion_predictions/EyePACS_lesion_128.pkl','rb'))
dict1 = {'0':0,'1':0,'2':0,'3':0,'4':0}
dict2 = {'0':0,'1':0,'2':0,'3':0,'4':0}
lesion_train = {'0':0,'1':0,'2':0,'3':0,'4':0}
lesion_val = {'0':0,'1':0,'2':0,'3':0,'4':0}

for train_val in data:
    print(len(data[train_val]))
    for img_path,lesion in data[train_val].items():
        label = re.split('/|\\\\',img_path)[-2]
        if train_val == 'train':
            dict1[label] += 1
            lesion_train[label] += len(lesion)
        elif train_val == 'val':
            dict2[label] += 1
            lesion_val[label] += len(lesion)

datapath = '/dataset/evaluate'
train_class_num = {'0':0,'1':0,'2':0,'3':0,'4':0}
val_class_num = {'0':0,'1':0,'2':0,'3':0,'4':0}
for i in os.listdir(datapath):
    for j in os.listdir(os.path.join(datapath,i)):
        
        path = os.path.join(datapath,i)+'/'+j
        files = os.listdir(path)
        if i == 'train':
            train_class_num[j] = len(files)
        elif i == 'val':
            val_class_num[j] = len(files)

print('训练集全部图片：',train_class_num)
print('训练集所使用的图片',dict1)
print('验证集全部图片:',val_class_num)
print('验证集所使用的图片',dict2)

print('训练集所使用的patch分布',lesion_train)
print('验证集所使用的patch分布',lesion_val)

7281
2184
训练集全部图片： {'0': 25810, '1': 2443, '2': 5292, '3': 873, '4': 708}
训练集所使用的图片 {'0': 0, '1': 1411, '2': 4356, '3': 834, '4': 680}
验证集全部图片: {'0': 8130, '1': 720, '2': 1579, '3': 237, '4': 240}
验证集所使用的图片 {'0': 0, '1': 433, '2': 1292, '3': 228, '4': 231}
训练集所使用的patch分布 {'0': 0, '1': 3132, '2': 16757, '3': 7004, '4': 4627}
验证集所使用的patch分布 {'0': 0, '1': 937, '2': 4842, '3': 1779, '4': 1552}


In [12]:
data = pickle.load(open('./lesion_predictions/EyePACS_lesion_128.pkl','rb'))

flag = 0
for train_val in data:
    for img_path,lesion in data[train_val].items():
        if flag == 0:
            flag += 1
            print(lesion)
        break
    
import random
bbox = []
for i in range(10):
    x1 = random.randint(60,452)
    x2 = random.randint(60,452)
    y1 = random.randint(60,452)
    y2 = random.randint(60,452)
    bbox.append((x1,x2,y1,y2))
print(bbox)

[(247.48550347222223, 257.4816160159716, 275.11972656250003, 304.0486912156167), (275.38743489583334, 211.3676519520852, 295.60091145833337, 238.13007431233365), (386.54500868055555, 298.80018855368235, 399.4493923611111, 323.59571872227156)]
[(297, 196, 181, 82), (377, 138, 173, 197), (202, 393, 274, 236), (196, 140, 446, 155), (219, 159, 113, 291), (202, 445, 403, 440), (248, 348, 154, 246), (432, 142, 297, 429), (187, 119, 232, 252), (222, 91, 183, 152)]


In [18]:
#生成数据集
import pickle,re
import os
import random
data = pickle.load(open('/dataset/lesion_predictions_new/EyePACS_newpredictions_128_08.pkl','rb'))

dict1 = {'0':0,'1':0,'2':0,'3':0,'4':0}
dict2 = {'0':0,'1':0,'2':0,'3':0,'4':0}
lesion_train = {'0':0,'1':0,'2':0,'3':0,'4':0}
lesion_val = {'0':0,'1':0,'2':0,'3':0,'4':0}

lesion_dataset = {}
healthy_dataset = {}
healthy_picture_train = [] #记录误诊为健康图片的数组，加入到healthy_dataset中
healthy_picture_val = []
#分离lesion和healthy
for train_val in data:
    if train_val not in lesion_dataset.keys():
        lesion_dataset[train_val] = {}
    if train_val not in healthy_dataset.keys():
        healthy_dataset[train_val] = {}
    print(len(data[train_val]))
    for img_path,lesion in data[train_val].items():
        label = re.split('/|\\\\',img_path)[-2]
        img_picture = re.split('/|\\\\',img_path)[-1]
        if train_val == 'train':
            dict1[label] += 1
            lesion_train[label] += len(lesion)
            if label != '0':
                lesion_dataset[train_val][img_path] = lesion
            else:
                healthy_dataset[train_val][img_path] = lesion
                healthy_picture_train.append(img_picture)
        elif train_val == 'val':
            dict2[label] += 1
            lesion_val[label] += len(lesion)
            if label != '0':
                lesion_dataset[train_val][img_path] = lesion
            else:
                healthy_dataset[train_val][img_path] = lesion
                healthy_picture_val.append(img_picture)

datapath = '/dataset/evaluate' #从数据集健康的图中提取健康的patch
for train_val in os.listdir(datapath):
    for label in os.listdir(os.path.join(datapath,train_val)):
        path = os.path.join(datapath,train_val)+'/'+label
        for  img_files in os.listdir(path):
            if label == '0':
                if train_val == 'train':
                    if img_files not in healthy_picture_train:
                        #随机选取bbox,加入健康patch
                        bbox = []
                        for i in range(4):
                            x1 = random.randint(60,452)
                            x2 = random.randint(60,452)
                            y1 = random.randint(60,452)
                            y2 = random.randint(60,452)
                            bbox.append((x1,x2,y1,y2))
                        img_path = datapath + '/' + train_val + '/' + label + '/' + img_files
                        healthy_dataset[train_val][img_path] = bbox
                elif train_val == 'val':
                    if img_files not in healthy_picture_val:
                        #随机选取bbox,加入健康patch
                        bbox = []
                        for i in range(4):
                            x1 = random.randint(60,452)
                            x2 = random.randint(60,452)
                            y1 = random.randint(60,452)
                            y2 = random.randint(60,452)
                            bbox.append((x1,x2,y1,y2))
                        img_path = datapath + '/' + train_val + '/' + label + '/' + img_files
                        healthy_dataset[train_val][img_path] = bbox

print(len(healthy_picture_train),len(healthy_picture_val))
with open('./lesion_predictions/EyePACS_healthy_128.pkl', 'wb') as f:
    pickle.dump(healthy_dataset, f)
with open('./lesion_predictions/EyePACS_lesion_128.pkl', 'wb') as f2:
    pickle.dump(lesion_dataset,f2)


21578
6736
14297 4552


In [1]:
from torch.utils.data import DataLoader
import pickle
from tqdm import tqdm
from data import generate_dataset_from_pickle,data_transforms
from config import DATA_CONFIG

data_config = DATA_CONFIG
data_index = './lesion_predictions/EyePACS_128.pkl'

data_path = '/dataset/evaluate/'
data_transform = data_transforms(data_config)
train_dataset,val_dataset = generate_dataset_from_pickle(data_path,data_index,data_config,data_transform)

train_loader = DataLoader(
        train_dataset,
        batch_size=20,
        shuffle=True,
        num_workers=0,
        drop_last=True,
        pin_memory=True
    )

progress = tqdm(enumerate(train_loader))
#1290,364 iteration

for step,lesion in progress:
    X1,X2,H = lesion
    if step  == 0 :
        print(len(X1))
        for i in H:
            print(len(i))
    else:
        break

    



1it [00:00,  1.33it/s]

20
20
20
20


1it [00:01,  1.29s/it]


In [8]:
import pickle,re
import os

# data = pickle.load(open('/dataset/lesion_predictions_new/EyePACS_newpredictions_128_08.pkl','rb'))
data = pickle.load(open('./lesion_predictions/EyePACS_128.pkl','rb'))
normal_data = pickle.load(open('/dataset/lesion_predictions_new/EyePACS_newpredictions_128_08.pkl','rb'))
val = normal_data['val']
train = {}
for i,j in data.items():
    for train_val,lesion in data[i].items():
        if train_val == 'train':
            train[i] = lesion

new_data = {'train':train,'val':val}
with open('./lesion_predictions/EyePACS_128.pkl', 'wb') as f:
    pickle.dump(new_data, f)
