In [3]:
from res.plot_lib import plot_data, plot_model, set_default
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import helper
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [4]:
### Code adapted from Yann LeCun and Alfredo Canziani 2019 Spring NYU Deep Learning Course
set_default()
def get_n_params(model):
    np=0
    for p in list(model.parameters()):
        np += p.nelement()
    return np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")
data.set_index("patient_id", inplace=True)
data["diagnosis"].value_counts()

unknown                               27124
nevus                                  5193
melanoma                                584
seborrheic keratosis                    135
lentigo NOS                              44
lichenoid keratosis                      37
solar lentigo                             7
atypical melanocytic proliferation        1
cafe-au-lait macule                       1
Name: diagnosis, dtype: int64

In [57]:
transform = transforms.Compose([transforms.Resize(255),
                                transforms.CenterCrop(224),
                                transforms.ToTensor()])
dataset = datasets.ImageFolder("/Users/jinmeng1/Desktop/College/Grad School/First Year Masters/Fall Semester/Intro to Data Science/Final/images", transform=transform)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=40, shuffle=True)


# images, labels = next(iter(dataloader))


In [20]:
dataloader.dataset.__getitem__(1)[0].view(-1, 3*224*224)

tensor([[0.8353, 0.8314, 0.8157,  ..., 0.6471, 0.6431, 0.6235]])

### Fully Connected NN and ConvNet Class

In [58]:
input_size = 3*224*224
output_size = 2

class FC2Layer(nn.Module):
    def __init__(self, input_size, n_hidden, output_size):
        super(FC2Layer, self).__init__()
        self.input_size = input_size
        self.network = nn.Sequential(
            nn.Linear(input_size, n_hidden), 
            nn.ReLU(), 
            nn.Linear(n_hidden, n_hidden), 
            nn.ReLU(), 
            nn.Linear(n_hidden, output_size), 
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        x = x.view(-1, self.input_size)
        return self.network(x)

class CNN(nn.Module):
    def __init__(self, input_size, n_feature, output_size):
        super(CNN, self).__init__()
        self.n_feature = n_feature
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=n_features, kernel_size=5)
        self.conv2 = nn.Conv2d(n_features, n_features, kernel_size=5)
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, output_size)
        
    def forward(self, x, verbose=False):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = x.view(-1, self.n_feature)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x

### Training and Testing Functions

In [22]:
# perm = torch.arange(0,150528).long()
# dataloader.dataset.__getitem__(0)[0].view(3, 224*224)
# dataloader.dataset.__getitem__(0)[0].view(-1, 3*224*224)[:, perm].view(-1,3,224,224)

In [56]:
for batch_idx, (data, target) in enumerate(dataloader):
    print(batch_idx)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31


In [60]:
accuracy_list = []

def train(epoch, model, perm = torch.arange(0,150528).long()):
    model.train()
    for batch_idx, (data, target) in enumerate(dataloader):
        # send to device
        data, target = data.to(device), target.to(device)
        
        # permute pixels
        data = data.view(-1, 3*224*224)[:, perm].view(-1,3,224,224)
#         data = dataloader.dataset.__getitem__(0)[0].view(-1, 3*224*224)[:, perm].view(-1,3,224,224)
#         data = data.view(-1, 28*28)
#         data = data[:, perm]
#         data = data.view(-1, 1, 28, 28)

        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(dataloader.dataset),
                100. * batch_idx / len(dataloader), loss.item()))
            
def test(model, perm = torch.arange(0,150528).long()):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        # send to device
        data, target = data.to(device), target.to(device)
        
        # permute pixels
        data = data.view(-1, 3*224*224)[:, perm].view(-1,3,224,224)
#        data = dataloader.dataset.__getitem__(0)[0].view(-1, 3*224*224)[:, perm].view(-1,3,224,224)
#         data = data.view(-1, 28*28)
#         data = data[:, perm]
#         data = data.view(-1, 1, 28, 28)
        output = model(data)
        test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss                                                               
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability                                                                 
        correct += pred.eq(target.data.view_as(pred)).cpu().sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    accuracy_list.append(accuracy)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        accuracy))

In [61]:
# Fully connected network

n_hidden = 8 # number of hidden units

model_fnn = FC2Layer(input_size, n_hidden, output_size)
model_fnn.to(device)
optimizer = optim.SGD(model_fnn.parameters(), lr=0.01, momentum=0.5)
print('Number of parameters: {}'.format(get_n_params(model_fnn)))

for epoch in range(0, 3):
    train(epoch, model_fnn)
#     test(model_fnn)

Number of parameters: 1204322


In [55]:
# ConvNet
# Training settings 
n_features = 5 # number of feature maps

model_cnn = CNN(input_size, n_features, output_size)
model_cnn.to(device)
optimizer = optim.SGD(model_cnn.parameters(), lr=0.01, momentum=0.5)
print('Number of parameters: {}'.format(get_n_params(model_cnn)))

for epoch in range(0, 1):
    train(epoch, model_cnn)
#     test(model_cnn)

Number of parameters: 7527562


RuntimeError: mat1 and mat2 shapes cannot be multiplied (179776x5 and 150528x50)