In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR

In [3]:
train_path = "./jpeg/train/"
test_path = "./jpeg/test/"
import pandas as pd

train_sheet = pd.read_csv("./train.csv")
test_sheet = pd.read_csv("./test.csv")

train_sheet.head(10)

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target
0,ISIC_2637011,IP_7279968,male,45.0,head/neck,unknown,benign,0
1,ISIC_0015719,IP_3075186,female,45.0,upper extremity,unknown,benign,0
2,ISIC_0052212,IP_2842074,female,50.0,lower extremity,nevus,benign,0
3,ISIC_0068279,IP_6890425,female,45.0,head/neck,unknown,benign,0
4,ISIC_0074268,IP_8723313,female,55.0,upper extremity,unknown,benign,0
5,ISIC_0074311,IP_2950485,female,40.0,lower extremity,unknown,benign,0
6,ISIC_0074542,IP_4698288,male,25.0,lower extremity,unknown,benign,0
7,ISIC_0075663,IP_6017204,female,35.0,torso,unknown,benign,0
8,ISIC_0075914,IP_7622888,male,30.0,torso,unknown,benign,0
9,ISIC_0076262,IP_5075533,female,50.0,lower extremity,unknown,benign,0


In [4]:
import sys, os
from imageio import imread

name_to_path = {}
labels = {}
partition = {'train': [], 'test': []}
for i in range(len(train_sheet)):
    pic_name = str(train_sheet['image_name'][i])
    partition['train'].append(pic_name)
    name_to_path[pic_name] = train_path + pic_name + '.jpg'
    labels[pic_name] = train_sheet['target'][i]

for i in range(len(test_sheet)):
    pic_name = str(test_sheet['image_name'][i])
    partition['test'].append(pic_name)
    name_to_path[pic_name] = test_path + pic_name + '.jpg'

len(partition['train']), len(partition['test'])

(33126, 10982)

In [5]:
import numpy as np
from PIL import Image

class SkinImageDataset(Dataset):
  'Characterizes a dataset for PyTorch'
  def __init__(self, list_names, labels, save=False, transform=None):
        'Initialization'
        self.labels = labels
        self.list_names = list_names
        self.transform = transform
        self.save = save

  def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_names)

  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        if not self.save:
            image_name = self.list_names[index]

            # Load data and get label
            if transform:
                  X = Image.open(name_to_path[image_name])
                  # print(type(X))
                  samlple_X = self.transform(X)
            else:
                  samlple_X = Image.open(name_to_path[image_name])
            # X = X.ToTensor()
            y = self.labels[image_name]

            return samlple_X, y
        else:
            image_name = self.list_names[index]

            # Load data and get label
            if transform:
                  X = Image.open(name_to_path[image_name])
                  samlple_X = self.transform(X)
            else:
                  samlple_X = Image.open(name_to_path[image_name])
            # X = X.ToTensor()
            # y = self.labels[image_name]

            return samlple_X, image_name 

In [6]:
transform = transforms.Compose({
    transforms.Resize((128, 128)),
    transforms.ToTensor()
})

count = len(partition['train'])
split_from = int(0.8 * count)
# train_dataset_tensor = SkinImageDataset(partition['train'][:split_from], labels, save=False, transform=transform)
# test_dataset_tensor = SkinImageDataset(partition['train'][split_from:], labels, save=False, transform=transform)

train_dataset_tensor = SkinImageDataset(partition['train'][:8000], labels, save=False, transform=transform)
test_dataset_tensor = SkinImageDataset(partition['train'][8000:10000], labels, save=False, transform=transform)

In [14]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Linear(256, 2)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        # print(x)
        return x


def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = nn.CrossEntropyLoss()(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args['log_interval'] == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if args['dry_run']:
                break


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            print(data.shape)
            data, target = data.to(device), target.to(device)
            
            output = model(data)
            test_loss +=  nn.CrossEntropyLoss()(output, target).item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


In [8]:
args = {
    'batch_size': 128,
    "epochs": 1,
    "lr": 0.01,
    "gamma": 0.7,
    "no_cuda": False,
    "seed": 1,
    "log_interval": 10,
    "dry_run": False
}
use_cuda = not args['no_cuda'] and torch.cuda.is_available()
torch.manual_seed(args['seed'])
device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'batch_size': args['batch_size']}
if use_cuda:
    kwargs.update({'num_workers': 1,
                    'pin_memory': True,
                    'shuffle': True},
                    )

In [9]:
train_loader = DataLoader(train_dataset_tensor,**kwargs)
# print(train_loader)
test_loader = DataLoader(test_dataset_tensor, **kwargs)
print(len(train_loader))
print(len(test_loader))


63
16


In [15]:
model = AlexNet().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=args['lr'])

scheduler = StepLR(optimizer, step_size=1, gamma=args['gamma'])
for epoch in range(1, args['epochs'] + 1):
    train(args, model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    save_result(model, device, save_dataset)
    scheduler.step()

RuntimeError: cuDNN error: CUDNN_STATUS_NOT_INITIALIZED