In [1]:
import torch
import torchvision
import construct
import prep
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.model_selection import KFold

In [2]:
class myModel(torch.nn.Module):
    def __init__(self):
        """
        Initializes CNN. Here we just define layer shapes that we call in the forward func
        """
        super().__init__()

        self.conv1 = torch.nn.Conv2d(in_channels = 3, 
                               out_channels = 6, 
                               kernel_size = 5,
                                    bias = False)
                
        #Convultion layer 2. See above
        self.conv2 = torch.nn.Conv2d(in_channels = 6, 
                               out_channels = 12, 
                               kernel_size = 5,
                                    bias = False)
        
        self.fc_1 = torch.nn.Linear(39 * 39 * 12, 256)
        self.fc_2 = torch.nn.Linear(256, 2)
        self.drop = torch.nn.Dropout(p=.2)
        self.batch1 = torch.nn.BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.batch2 = torch.nn.BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            
    def forward(self, x):
        """
        Function that performs all the neural network forward calculation i.e.
        takes image data from the input of the neural network to the output
        """
        
        x = self.conv1(x)
        x = self.batch1(x)
        x = torch.nn.functional.max_pool2d(x, kernel_size = 2)
        x = torch.nn.functional.leaky_relu(x)
        x = self.conv2(x)
        x = self.batch2(x)
        x = torch.nn.functional.max_pool2d(x, kernel_size = 4)
        x = torch.nn.functional.leaky_relu(x)
        x = x.view(x.shape[0], -1)  
        x = self.fc_1(x) 
        x = torch.nn.functional.leaky_relu(x)
        x = self.drop(x)
        x = self.fc_2(x) 
        return x

model = myModel()

In [3]:
k = 5
epochs = 250
BATCH_SIZE = 32
criterion = torch.nn.CrossEntropyLoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transforms = torchvision.transforms.Compose([
                            torchvision.transforms.ToPILImage(),
                            torchvision.transforms.RandomRotation((-180,180)),
                            torchvision.transforms.CenterCrop((325)),
                            torchvision.transforms.ToTensor()
                                      ])
image_dir = 'will/data/raman_images'
labels_file = 'will/data/labels.csv'
DATA = prep.prep_data(pd.read_csv(labels_file), image_dir)

optimizer = torch.optim.Adam(model.parameters(), lr=.0015)


x = construct.k_fold(k, epochs, BATCH_SIZE, transforms, criterion, model, DATA, device, image_dir)
(models, losses, train_accs, naive_accs, test_accs) = x

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  identification[i] = False
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  identification[i] = True


EPOCH: 0, acc: 0.5649038461538461, loss: 0.6642785301575294
EPOCH: 1, acc: 0.6338141010357783, loss: 0.6535230462367718
EPOCH: 2, acc: 0.5761217933434707, loss: 0.6620315267489507
EPOCH: 3, acc: 0.5769230769230769, loss: 0.6591429389440097
EPOCH: 4, acc: 0.6145833318050091, loss: 0.6534900115086482
EPOCH: 5, acc: 0.6057692307692307, loss: 0.6529219929988568
EPOCH: 6, acc: 0.6225961538461539, loss: 0.65158945780534
EPOCH: 7, acc: 0.5785256417898031, loss: 0.662000990830935
EPOCH: 8, acc: 0.6001602548819321, loss: 0.6681597737165598
EPOCH: 9, acc: 0.5953525625742399, loss: 0.6686418652534485
EPOCH: 10, acc: 0.6201923076923077, loss: 0.6576826159770672
EPOCH: 11, acc: 0.5873397451180679, loss: 0.6620714618609502
EPOCH: 12, acc: 0.6225961538461539, loss: 0.6515148052802453
EPOCH: 13, acc: 0.6201923076923077, loss: 0.6592235611035273
EPOCH: 14, acc: 0.6201923076923077, loss: 0.6565457995121295
EPOCH: 15, acc: 0.6185897451180679, loss: 0.6613140243750352
EPOCH: 16, acc: 0.6193910241127014, l

KeyboardInterrupt: 

In [None]:
count = 0
for i in range(len(DATA)):
    if math.isclose(DATA['isPlastic'][i][0], 1): count+=1

In [None]:
count

In [None]:
len(DATA)-count

In [None]:
349/len(DATA)

In [None]:
def k_fold(
        n_splits,
        epochs,
        batch_size,
        transforms,
        criterion,
        model,
        dataframe,
        device,
        image_root):
    """
    Perform K-fold cross validation.

    Parameters
    ----------
    n_splits: int
        Number of splits to make of the data.
    epochs: int
        Number of times to pass the data through the CNN.
    batch_size: int
        Number of samples to pass through at a time.
    transforms: pytorch object
        Transformations to make on the images.
    criterion: pytorch object
        Defines the loss function for training the model.
    model: custom pytorch object
        Architecture of the CNN in a pytorch object.
    dataframe: DataFrame
        DataFrame containing the data set.
    device: pytorch device object
        Device on which calculations are being performed.
    image_root: str
        Directory where image files are located.

    Returns
    -------
    models: list of pytorch objects
        Each element is a CNN model object.
    losses: list of lists of floats
        List of loss value lists.
    train_accs: list of lists of floats
        List of accuracy value lists.
    naive_accs: list of floats
        List of accuracies if only non-plastic (0) is predicted.
    test_accs: list of floats
        List of accurcies of the models on the test proportion.
    """
    kf = KFold(n_splits=n_splits, shuffle=True)
    models = []
    losses = []
    train_accs = []
    test_accs = []
    naive_accs = []

    for train_idx, test_idx in kf.split(dataframe):
        optimizer = torch.optim.Adam(model.parameters(), lr=.0015)
        curr_model = copy.deepcopy(model)
        train_df = dataframe.iloc[train_idx].reset_index()
        test_df = dataframe.iloc[test_idx].reset_index()
        train_data = prep.tenX_dataset(
            train_df, image_root, transform=transforms)
        test_data = prep.tenX_dataset(
            test_df, image_root, transform=transforms)
        cnn, train_loss, train_acc = train(
                epochs, batch_size, train_data,
                criterion, optimizer, curr_model, device)
        models.append(cnn)
        train_accs.append(train_acc)
        losses.append(train_loss)
        images, labels, predictions, weights, test_acc = get_predictions(
            batch_size, cnn, test_data)
        test_accs.append(test_acc)
        naive_accs.append((labels[:, 1] == 0).float().sum() / len(predictions))

    return models, losses, train_accs, naive_accs, test_accs
