In [2]:
import torch
import torchvision
from torchvision import transforms as T
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True)

Files already downloaded and verified


In [27]:
# Which classes do we have
classes = list(train_dataset.classes)
print(classes)

# Count current classes
class_counter = np.zeros(len(classes))
for data, label in train_dataset:
    class_counter[label] +=1

print(class_counter)



percentage = [1, 1, 1, 0.8, 0.5, 0.2, 0.2, 0.2, 0.1, 0.1]

print(class_counter * percentage)

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
[5000. 5000. 5000. 5000. 5000. 5000. 5000. 5000. 5000. 5000.]
[5000. 5000. 5000. 4000. 2500. 1000. 1000. 1000.  500.  500.]


In [21]:
# Create a list of the number of images for each class
num_images = [500, 500, 500, 500, 500, 1000, 1000, 1000, 1000, 2000]

# Create a list of indices for the total number of images
indices = list(range(len(train_dataset)))

""" We want to go through all the data. If the data matches the current class, put it in a 
tmp list. And then just save a subsection of it to our dataset"""
class_indices = []
for i in range(len(classes)):  # i ist jeweils eine Klasse
    data_from_this_class = []
    for j in range(len(train_dataset)):   # j ist jeweils ein data label paar aus dem Datensatz
        if train_dataset[j][1] == i:  # Wenn die aktuelle Klasse mit der Datei übereinstimmt
            data_from_this_class.append(indices[j]) # wir wissen dass element j zu dieser Klasse dazu gehört
    
    class_indices.append(data_from_this_class[:num_images[i]])  # Nimm aber nur so viele wie oben angegeben


# Flatten the list
class_indices = [item for sublist in class_indices for item in sublist]

# Create a new dataset with the new indices
imbalanced_dataset = torch.utils.data.Subset(train_dataset, class_indices)


In [23]:

train_transform = T.Compose([T.Resize((224,224)),  #resises the image so it can be perfect for our model.
                                T.RandomHorizontalFlip(), # FLips the image w.r.t horizontal axis
                                T.RandomRotation(10),     #Rotates the image to a specified angel
                                T.RandomAffine(0, shear=10, scale=(0.8,1.2)), #Performs actions like zooms, change shear angles.
                                T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), # Set the color params
                                T.ToTensor(), # comvert the image to tensor so that it can work with torch
                                T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) #Normalize all the images
                                ])

transform = T.Compose([T.ToTensor(),
                        T.Resize((224,224)),
                        T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset2 = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)


testset2 = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

num_classes = 10




Files already downloaded and verified
Files already downloaded and verified


In [24]:
print(len(trainset2))

50000


In [22]:
# Which classes do we have
classes = list(train_dataset.classes)
print(classes)

# Count current classes
class_counter = np.zeros(len(classes))
for data, label in imbalanced_dataset:
    class_counter[label] +=1

print(class_counter)

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
[ 500.  500.  500.  500.  500. 1000. 1000. 1000. 1000. 2000.]


In [47]:
torch.randint(1,9, (1,))

tensor([7])

In [41]:
trainset[50][1]

9

In [5]:
#accessing CIFAR10 dataset
dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True)

#changing the dataset
for i in range(len(dataset)):
    if torch.rand(1)<0.1:  # with a probability of 10%
        print(dataset[i][1])
        changed_element = (dataset[i][1] + torch.randint(1,9, (1,))) % 10
        print(changed_element)
        list_dataset = list(dataset[i])
        list_dataset[1]= changed_element
        dataset[i] = tuple(list_dataset)
        print(dataset[i][1])


Files already downloaded and verified
3
tensor([1])


TypeError: 'CIFAR10' object does not support item assignment

In [None]:
# Add incorrect labels with 10% probability
import random
for data in dataset:
    if random.random() < 0.1:
        # Randomly select a label not equal to the correct one
        incorrect_label = random.choice([x for x in range(10) if x != data[1]])
        data = (data[0], incorrect_label)
        


### Get HAM Image Data

In [3]:
import pandas as pd
import os
from glob import glob

data_dir = "Ham10000"
all_image_path = glob(os.path.join(data_dir, '*', '*.jpg'))
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x for x in all_image_path}
lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'dermatofibroma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}


In [26]:
df_original = pd.read_csv(os.path.join(data_dir, 'Ham_data.csv'))
df_original['path'] = df_original['image_id'].map(imageid_path_dict.get)
#df_original['cell_type'] = df_original['dx'].map(lesion_type_dict.get)
df_original['cell_type_idx'] = pd.Categorical(df_original['benign_malignant']).codes
df_original.head()

Unnamed: 0,id,image_id,lesion_id,patient_id,dx,dx_type,age,sex,localization,melanocytic,benign_malignant,image_type,dataset,image_path,set,fold,path,cell_type_idx
0,5aaf12491165976913627e89,ISIC_0024306,HAM_0000550,,nevus,serial imaging showing no change,45.0,male,,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024306.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024306.jpg,0
1,5aaf12491165976913627e95,ISIC_0024307,HAM_0003577,,nevus,serial imaging showing no change,50.0,male,lower extremity,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024307.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024307.jpg,0
2,5aaf12491165976913627ea0,ISIC_0024308,HAM_0001477,,nevus,serial imaging showing no change,55.0,female,,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024308.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024308.jpg,0
3,5aaf12491165976913627eab,ISIC_0024309,HAM_0000484,,nevus,serial imaging showing no change,40.0,male,,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024309.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024309.jpg,0
4,5aaf12491165976913627eb6,ISIC_0024310,HAM_0003350,,melanoma,histopathology,60.0,male,anterior torso,True,malignant,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024310.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024310.jpg,1


In [24]:
df_train = df_original[(df_original["set"]=="train") & (df_original["fold"]==0.0)]
df_test = df_original[(df_original["set"]=="test")]
df_val = df_original[(df_original["set"]=="val") & (df_original["fold"]==0.0)]

print("traindata", len(df_train))
print("testdata", len(df_test))
print("valdata", len(df_val))

traindata 11664
testdata 200
valdata 2917


In [15]:
df_train = df_train.reset_index()
df_val = df_val.reset_index()
df_test = df_test.reset_index()

In [16]:


# Define a pytorch dataloader for this dataset
class HAM10000(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        # Load data and get label
        X = Image.open(self.df['path'][index])
        y = torch.tensor(int(self.df['cell_type_idx'][index]))

        if self.transform:
            X = self.transform(X)

        return X, y



In [17]:
# pytorch libraries
import torch
from torch import optim,nn
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset
from torchvision import models,transforms


norm_mean = (0.49139968, 0.48215827, 0.44653124)
norm_std = (0.24703233, 0.24348505, 0.26158768)
input_size = 224
# define the transformation of the train images.
train_transform = transforms.Compose([transforms.Resize((input_size,input_size)),transforms.RandomHorizontalFlip(),
                                      transforms.RandomVerticalFlip(),transforms.RandomRotation(20),
                                      transforms.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1),
                                        transforms.ToTensor(), transforms.Normalize(norm_mean, norm_std)])
# define the transformation of the val images.
val_transform = transforms.Compose([transforms.Resize((input_size,input_size)), transforms.ToTensor(),
                                    transforms.Normalize(norm_mean, norm_std)])

In [18]:
training_set = HAM10000(df_train, transform=train_transform)
train_loader = DataLoader(training_set, batch_size=32, shuffle=True, num_workers=4)
# Same for the validation set:
validation_set = HAM10000(df_val, transform=train_transform)
val_loader = DataLoader(validation_set, batch_size=32, shuffle=False, num_workers=4)

#### From Class

In [27]:
class HAM10000(Dataset):
    """Pytroch dataloader for the HAM Dataset"""
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        # Load data and get label
        print("###############")
        print(index)
        print(self.df['path'][index])
        X = Image.open(self.df['path'][index])
        print(X)
        y = torch.tensor(int(self.df['cell_type_idx'][index]))
        print(y)

        if self.transform:
            X = self.transform(X)

        return X, y

In [None]:
train_transform = T.Compose([T.Resize((32,32)),  #resises the image so it can be perfect for our model.
                                T.RandomHorizontalFlip(), # FLips the image w.r.t horizontal axis
                                T.RandomRotation(10),     #Rotates the image to a specified angel
                                T.RandomAffine(0, shear=10, scale=(0.8,1.2)), #Performs actions like zooms, change shear angles.
                                T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), # Set the color params
                                T.ToTensor(), # comvert the image to tensor so that it can work with torch
                                T.Normalize((0.49139968, 0.48215827, 0.44653124), (0.24703233, 0.24348505, 0.26158768)) #Normalize all the images
                                ])

transform = T.Compose([T.ToTensor(),
                        T.Resize((224,224)),
                        T.Normalize((0.49139968, 0.48215827, 0.44653124), (0.24703233, 0.24348505, 0.26158768))])



In [30]:
# Retrieve all images
data_dir = "Ham10000"
all_image_path = glob(os.path.join(data_dir, '*', '*.jpg'))
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x for x in all_image_path}

# Get data .csv and add new info to dataframe
df_original = pd.read_csv(os.path.join(data_dir, 'Ham_data.csv'))
df_original['path'] = df_original['image_id'].map(imageid_path_dict.get)
df_original['cell_type_idx'] = pd.Categorical(df_original['benign_malignant']).codes

display(df_original.head(5))


Unnamed: 0,id,image_id,lesion_id,patient_id,dx,dx_type,age,sex,localization,melanocytic,benign_malignant,image_type,dataset,image_path,set,fold,path,cell_type_idx
0,5aaf12491165976913627e89,ISIC_0024306,HAM_0000550,,nevus,serial imaging showing no change,45.0,male,,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024306.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024306.jpg,0
1,5aaf12491165976913627e95,ISIC_0024307,HAM_0003577,,nevus,serial imaging showing no change,50.0,male,lower extremity,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024307.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024307.jpg,0
2,5aaf12491165976913627ea0,ISIC_0024308,HAM_0001477,,nevus,serial imaging showing no change,55.0,female,,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024308.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024308.jpg,0
3,5aaf12491165976913627eab,ISIC_0024309,HAM_0000484,,nevus,serial imaging showing no change,40.0,male,,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024309.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024309.jpg,0
4,5aaf12491165976913627eb6,ISIC_0024310,HAM_0003350,,melanoma,histopathology,60.0,male,anterior torso,True,malignant,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024310.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024310.jpg,1


In [31]:
# Get train val and testset
df_train = df_original[(df_original["set"]=="train") & (df_original["fold"]==0.0)]
df_test = df_original[(df_original["set"]=="test")]
df_val = df_original[(df_original["set"]=="val") & (df_original["fold"]==0.0)]

display(df_train.head(5))

df_train = df_train.reset_index()
df_val = df_val.reset_index()
df_test = df_test.reset_index()

display(df_train.head(5))

trainset = HAM10000(df_train, transform=train_transform)
validationset = HAM10000(df_val, transform=train_transform)
testset = HAM10000(df_test, transform=transform)


Unnamed: 0,id,image_id,lesion_id,patient_id,dx,dx_type,age,sex,localization,melanocytic,benign_malignant,image_type,dataset,image_path,set,fold,path,cell_type_idx
0,5aaf12491165976913627e89,ISIC_0024306,HAM_0000550,,nevus,serial imaging showing no change,45.0,male,,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024306.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024306.jpg,0
1,5aaf12491165976913627e95,ISIC_0024307,HAM_0003577,,nevus,serial imaging showing no change,50.0,male,lower extremity,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024307.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024307.jpg,0
2,5aaf12491165976913627ea0,ISIC_0024308,HAM_0001477,,nevus,serial imaging showing no change,55.0,female,,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024308.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024308.jpg,0
3,5aaf12491165976913627eab,ISIC_0024309,HAM_0000484,,nevus,serial imaging showing no change,40.0,male,,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024309.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024309.jpg,0
4,5aaf12491165976913627eb6,ISIC_0024310,HAM_0003350,,melanoma,histopathology,60.0,male,anterior torso,True,malignant,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024310.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024310.jpg,1


Unnamed: 0,index,id,image_id,lesion_id,patient_id,dx,dx_type,age,sex,localization,melanocytic,benign_malignant,image_type,dataset,image_path,set,fold,path,cell_type_idx
0,0,5aaf12491165976913627e89,ISIC_0024306,HAM_0000550,,nevus,serial imaging showing no change,45.0,male,,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024306.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024306.jpg,0
1,1,5aaf12491165976913627e95,ISIC_0024307,HAM_0003577,,nevus,serial imaging showing no change,50.0,male,lower extremity,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024307.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024307.jpg,0
2,2,5aaf12491165976913627ea0,ISIC_0024308,HAM_0001477,,nevus,serial imaging showing no change,55.0,female,,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024308.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024308.jpg,0
3,3,5aaf12491165976913627eab,ISIC_0024309,HAM_0000484,,nevus,serial imaging showing no change,40.0,male,,True,benign,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024309.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024309.jpg,0
4,4,5aaf12491165976913627eb6,ISIC_0024310,HAM_0003350,,melanoma,histopathology,60.0,male,anterior torso,True,malignant,dermoscopic,HAM10000,original/HAM10000/600x450/ISIC_0024310.png,train,0.0,Ham10000\HAM10000_images_part_1\ISIC_0024310.jpg,1


NameError: name 'transform' is not defined