In [1]:
import io
import matplotlib.pyplot as plt
import time
import os
import copy
from glob import glob
import torch
from tqdm import tqdm
import warnings
import pandas as pd

import albumentations as A
from PIL import Image
import cv2
from albumentations.pytorch import ToTensorV2
warnings.simplefilter('ignore')


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.get_device_name())

  from .autonotebook import tqdm as notebook_tqdm


cuda:0
NVIDIA GeForce RTX 3050 Laptop GPU


In [3]:
import torch
from torch.utils.data import Dataset

class AlzheimerDataset(Dataset):
    def __init__(self, images_filepaths, transform=None):
        self.images_filepaths = images_filepaths
        self.transform = transform

    def __len__(self):
        return len(self.images_filepaths)

    def __getitem__(self, idx):
        image_filepath = self.images_filepaths[idx]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if os.path.normpath(image_filepath).split(os.sep)[-2] == "Mild_Demented":
            label = 0
        elif os.path.normpath(image_filepath).split(os.sep)[-2] == "Moderate_Demented":
            label=1
        elif os.path.normpath(image_filepath).split(os.sep)[-2] == "Non_Demented":
            label=2
        else:
            label = 3
        if self.transform is not None:
            image = self.transform(image=image)["image"]

        return image, label

In [None]:
#pip install split-folders (记得安装)
import splitfolders

path='./Dataset'
splitfolders.ratio(path,ratio=(0.7,0.3,0))
# 拆分训练集,验证集，测试集

In [4]:
#将train，validation的数据保存到output文件夹
datasets={
        'train':[],
        'val':[]
    }
for phase in ['train','val']:
    l=[]
    for i in glob(f'./output/{phase}/**/*'):
        l.append(i)
    datasets[phase]=l

In [5]:
#对数据进行Augmentation处理
train_transform = A.Compose(
    [
        A.Resize(height=128, width=128),  # 调整图像大小为 128x128
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),  # 随机平移、缩放和旋转
        A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),  # 随机RGB通道偏移
        A.RandomBrightnessContrast(p=0.5),  # 随机亮度和对比度调整
        A.ColorJitter(),  # 随机色彩抖动
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # 标准化
        ToTensorV2(),  # 转换为PyTorch张量
    ]
)

original_transform = A.Compose(
    [
        A.Resize(128, 128),  # 调整图像大小为 128x128
        A.CenterCrop(height=128, width=128),  # 中心裁剪
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # 标准化
        ToTensorV2(),  # 转换为PyTorch张量
    ]
)

#创建实例
alb_dataset = AlzheimerDataset(images_filepaths=datasets['train'], transform=train_transform)
original_dataset=AlzheimerDataset(images_filepaths=datasets['train'], transform=original_transform)

dataset_sizes = {x: len(datasets[x]) for x in ['train', 'val']}
class_names = ['Mild_Demented','Moderate_Demented','Non_Demented','Very_Mild_Demented']
dataset_sizes

{'train': 4479, 'val': 0}

In [7]:
#保存augmentation处理后的数据于predata文件夹
import uuid
try:
    os.mkdir('./prepdata')
    os.mkdir('./prepdata/train')
    os.mkdir('./prepdata/train/Mild_Demented')
    os.mkdir('./prepdata/train/Moderate_Demented')
    os.mkdir('./prepdata/train/Non_Demented')
    os.mkdir('./prepdata/train/Very_Mild_Demented')
    
except:
    print('Files exist')

def OriginalSave(originalDataset,limit):
    s={0:'Mild_Demented',1:'Moderate_Demented',2:'Non_Demented',3:'Very_Mild_Demented'}
    originalDataset.transform = A.Compose([t for t in originalDataset.transform if not isinstance(t, (A.Normalize, ToTensorV2))])
    
    for idx in range(limit):
        image,label=originalDataset[idx]

        cv2.imwrite(f'./prepdata/{s[label]}/{str(uuid.uuid4())}.jpg',image)
OriginalSave(original_dataset, dataset_sizes['train'])

Files exist


In [7]:
# balance data
def AlbSave(albDataset,limit):
    s={0:'Mild_Demented',1:'Moderate_Demented',2:'Non_Demented',3:'Very_Mild_Demented'}
    sizes={'Mild_Demented':896,'Moderate_Demented':64,'Non_Demented':3200,'Very_Mild_Demented':2240}

    albDataset.transform = A.Compose([t for t in albDataset.transform if not isinstance(t, (A.Normalize, ToTensorV2))])
    for idx in range(limit):
        for _ in range(7):
            image,label=albDataset[idx]
            if label==0:
                cv2.imwrite(f'./prepdata/train/{s[label]}/{str(uuid.uuid4())}.jpg',image)

        for _ in range(100):
            image,label=albDataset[idx]
            if label==1:
                cv2.imwrite(f'./prepdata/train/{s[label]}/{str(uuid.uuid4())}.jpg',image)

        for _ in range(2):
            image,label=albDataset[idx]
            if label==2:
                cv2.imwrite(f'./prepdata/train/{s[label]}/{str(uuid.uuid4())}.jpg',image)
                
        for _ in range(3):
            image,label=albDataset[idx]
            if label==3:
                cv2.imwrite(f'./prepdata/train/{s[label]}/{str(uuid.uuid4())}.jpg',image)

AlbSave(alb_dataset,dataset_sizes['train'])

KeyboardInterrupt: 

In [6]:
#不对验证集进行augmentation处理
import shutil
shutil.move('./output/val','./prepdata/')

Error: Destination path './prepdata/val' already exists