In [1]:
import prep
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn
import skimage.io
import math
import random
import torchvision.transforms


class default(torch.nn.Module):
    def __init__(self):
        """
        Initializes CNN. Here we just define layer shapes that we call in the forward func
        """
        super().__init__()

        self.conv1 = torch.nn.Conv2d(in_channels = 3, 
                               out_channels = 6, 
                               kernel_size = 5)
                
        #Convultion layer 2. See above
        self.conv2 = torch.nn.Conv2d(in_channels = 6, 
                               out_channels = 12, 
                               kernel_size = 5)
        
        self.fc_1 = torch.nn.Linear(39 * 39 * 12, 256)
        self.fc_2 = torch.nn.Linear(256, 2)
            
    def forward(self, x):
        """
        Function that performs all the neural network forward calculation i.e.
        takes image data from the input of the neural network to the output
        """
        
        x = self.conv1(x)
        x = torch.nn.functional.max_pool2d(x, kernel_size = 2)
        x = torch.nn.functional.leaky_relu(x)
        x = self.conv2(x)
        x = torch.nn.functional.max_pool2d(x, kernel_size = 4)
        x = torch.nn.functional.leaky_relu(x)
        x = x.view(x.shape[0], -1)  
        x = self.fc_1(x) 
        x = torch.nn.functional.leaky_relu(x)
        x = self.fc_2(x)    
        
        return x

default_model = default()


def calculate_accuracy(y_pred, y):
    acc = ((y_pred.argmax(dim=1) == y).float().mean())
    return acc


def train_iteration(model, iterator, optimizer, criterion, device):
    """
    Training loop. Takes data through NN calculates loss and adjusts NN. Repeat
    """
    epoch_loss = 0
    epoch_acc = 0
    #Need to add logic to skip iteration if image is None
    for sample in iterator:  
        image = sample['image'].to(device)
        isPlasticRaw = sample['plastic'].to(device)
        optimizer.zero_grad()      
        y_pred = model(image)
        isPlastic = isPlasticRaw.argmax(dim=1)
        loss = criterion(y_pred, isPlastic)
        acc = calculate_accuracy(y_pred, isPlastic)
        loss.backward()    
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator) , epoch_acc / len(iterator), y_pred, isPlasticRaw



class default(torch.nn.Module):
    def __init__(self):
        """
        Initializes CNN. Here we just define layer shapes that we call in the forward func
        """
        super().__init__()

        self.conv1 = torch.nn.Conv2d(in_channels = 3, 
                               out_channels = 6, 
                               kernel_size = 5)
                
        #Convultion layer 2. See above
        self.conv2 = torch.nn.Conv2d(in_channels = 6, 
                               out_channels = 12, 
                               kernel_size = 5)
        
        self.fc_1 = torch.nn.Linear(39 * 39 * 12, 256)
        self.fc_2 = torch.nn.Linear(256, 2)
            
    def forward(self, x):
        """
        Function that performs all the neural network forward calculation i.e.
        takes image data from the input of the neural network to the output
        """
        
        x = self.conv1(x)
        x = torch.nn.functional.max_pool2d(x, kernel_size = 2)
        x = torch.nn.functional.leaky_relu(x)
        x = self.conv2(x)
        x = torch.nn.functional.max_pool2d(x, kernel_size = 4)
        x = torch.nn.functional.leaky_relu(x)
        x = x.view(x.shape[0], -1)  
        x = self.fc_1(x) 
        x = torch.nn.functional.leaky_relu(x)
        x = self.fc_2(x)    
        
        return x

    

default_model = default()
default_optimizer = torch.optim.Adam(default_model.parameters(), lr=.002)

def train(epochs, batch_size, dataset, criterion,
          optimizer=default_optimizer,
          model=default_model,
          device=torch.device('cpu')):
    
    optimizer = torch.optim.Adam(model.parameters(), lr=.002)

    
    train_iterator = torch.utils.data.DataLoader(dataset, 
                                 shuffle = True, 
                                 batch_size = batch_size)
    model.to(device)
    criterion.to(device)
    
    for epoch in range(epochs+1):
        train_loss, train_acc, y_pred, target = (
            train_iteration(model, train_iterator, optimizer, criterion, device))
        print(f'EPOCH: {epoch}, acc: {train_acc}, loss: {train_loss}')
        if epoch % 5 is 0:
            print(y_pred)
            print(target)

In [2]:
image_dir = 'data/10x'
labels_file = 'data/me.csv'

DATA = prep.prep_data(pd.read_csv(labels_file), image_dir)

transforms = torchvision.transforms.Compose([
                            torchvision.transforms.ToPILImage(),
                            torchvision.transforms.RandomRotation((-180,180)),
                            torchvision.transforms.CenterCrop((325)),
                            torchvision.transforms.ToTensor()
                                      ])

train_data = prep.tenX_dataset(DATA, 'data/10x', transform=transforms)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  identification[i] = False
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  identification[i] = True


In [6]:
#Declaring iterator. The thing that will loop through our dataset.

train_data = prep.tenX_dataset(DATA, 'data/10x', transform =transforms)
criterion = torch.nn.CrossEntropyLoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#optimizer = torch.optim.Adam(model.parameters(), lr=.002)
BATCH_SIZE = 10
epochs = 50

In [7]:
model.train(epochs, BATCH_SIZE, train_data, criterion)

EPOCH: 0, acc: 0.8125447996201054, loss: 0.4519150067240961
tensor([[ 0.6090, -0.8797],
        [ 0.5215, -0.7523],
        [ 0.7102, -1.0227],
        [ 0.5189, -0.7556],
        [ 0.6566, -0.9649],
        [ 0.8082, -1.1734],
        [ 0.5143, -0.7490],
        [ 0.4659, -0.6778],
        [ 0.5205, -0.7503]], grad_fn=<AddmmBackward>)
tensor([[1, 0],
        [1, 0],
        [1, 0],
        [1, 0],
        [0, 1],
        [1, 0],
        [1, 0],
        [1, 0],
        [1, 0]])
EPOCH: 1, acc: 0.8376344000139544, loss: 0.43134462496926707
EPOCH: 2, acc: 0.8064516109804953, loss: 0.4453781021218146
EPOCH: 3, acc: 0.8182795701488372, loss: 0.4440372329085104
EPOCH: 4, acc: 0.8415770569155293, loss: 0.5060019998721057
EPOCH: 5, acc: 0.8354838721213802, loss: 0.4498979713647596
tensor([[ 1.1543, -0.9445],
        [ 0.9911, -0.8002],
        [ 0.9945, -0.8325],
        [ 0.8983, -0.7509],
        [ 0.9387, -0.7889],
        [ 1.0886, -0.8640],
        [ 0.3604, -0.3700],
        [ 0.8727, -0

KeyboardInterrupt: 

In [None]:
def test_dataset_class():
    filename = '../tests/test_assets/test_labels_unclean.csv'
    image_dir = '../tests/test_assets/test_images'
    
    #Eventually just read in already cleaned labels sheet
    labels = prep_data(pd.read_csv(filename), image_dir)
    
    
    transform = None
    test_dataset = tenX_dataset(labels, image_dir, transform)
    
    #len() check
    length = len(test_dataset)
    expect = 5
    assert length is expect, f'10x dataset length method failed. Got {length}, should be {expect}'
    
    #get_item() check
    samples = []
    keys = ['image', 'plastic','shape','color']
    for i in range(len(test_dataset)):
        assert test_dataset[i]['image'] is not None, 'Got NoneType instead of image'
        isP = test_dataset[i]['plastic']
        assert math.isclose(0,isP) or math.isclose(1,isP), f'plastic not 0 or 1, instead is {isP}'
        length = len(test_dataset[i]['shape'])
        assert length is 4, f'length of shape array not 4, instead is {length}'
        samples.append(test_dataset[i])
        
    shape = samples[0]['shape']
    assert math.isclose(shape[2], 1),  f'wrong shape first image, is {shape}'
    color = samples[0]['color']
    assert math.isclose(color[0], 1), f'wrong color first image, is {color}'
    assert math.isclose(samples[4]['plastic'], 1), 'wrong plastic id 3rd image'
    
test_dataset_class()