In [119]:
import numpy as np
import matplotlib.pyplot as plt
import csv
import os
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from PIL import Image
import time

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
np.random.seed(0)

In [120]:
"""
PREPROCESSING HELPER FUNCTION

formats image files and converts into numpy arrays of the same shape, then saves to specified location

Returns 0 upon success, otherwise returns 1 (unsuccessful may be due to undesirable properties such as
large aspect ratio)
"""

def process_file(file_path, processed_file_path, target_size = 32, aspect_ratio_thres = 1.3):
    with Image.open(file_path) as image:
        # crop out the longer dimension so it's square
        width, height = image.size
        new_size = min(width, height)
        if new_size * aspect_ratio_thres < max(width, height):
            # print(f'skipped due to aspect ratio too high')
            return 1
        left = (width - new_size) / 2
        top = (height - new_size) / 2
        right = (width + new_size) / 2
        bottom = (height + new_size) / 2
        image = image.crop((left, top, right, bottom))
        
        # resize
        image = image.resize((target_size, target_size))

        # convert the image to RGB
        image = image.convert('RGB')
        
        # convert to numpy array
        img_array = np.array(image)

        # print(f'processed array shape: {img_array.shape}')
        if img_array.shape != (target_size, target_size, 3):
            print(f'error! wrong shape {img_array.shape}')
        
        # save to file
        np.save(processed_file_path, img_array)
        return 0

In [121]:
"""
MAIN PIPELINE 1

preprocesses data by reading from directory and saves processed data into another directory with same structure.
"""

def preprocess(raw_data_dir, processed_data_dir):
    # Create the processed_data directory if it doesn't exist
    if not os.path.exists(processed_data_dir):
        os.makedirs(processed_data_dir)

    # List all subfolders in raw_data
    for subdir in os.listdir(raw_data_dir):
        print(f'reading directory {subdir}')
        raw_subdir_path = os.path.join(raw_data_dir, subdir)
        
        # Check if it's a directory
        if not os.path.isdir(raw_subdir_path):
            continue
        processed_subdir_path = os.path.join(processed_data_dir, subdir)
        
        # Create the subfolder in processed_data if it doesn't exist
        if not os.path.exists(processed_subdir_path):
            os.makedirs(processed_subdir_path)
        
        count = 0
        processed_count = 0
        total = len(os.listdir(raw_subdir_path))
        # Process each file in the subdirectory
        for filename in os.listdir(raw_subdir_path):
            file_path = os.path.join(raw_subdir_path, filename)
            processed_file_path = os.path.join(processed_subdir_path, filename)
            
            # Process the file
            return_code = process_file(file_path, processed_file_path)
            count += 1
            if return_code == 0:
                processed_count += 1
            if count % 100 == 0 or count == total:
                print(f'traversed {count} / {total}, processed {processed_count}')
                
    print('finished preprocessing')

In [122]:
"""
IMPORTS PROCESSED IMAGES INTO X AND Y
"""

def import_images(processed_data_dir, folders):
    total_items = 0
    total_categories = 0
    image_dim = None

    # List all subfolders
    for subdir in os.listdir(processed_data_dir):
        if folders is not None and subdir not in folders:
            continue

        subdir_path = os.path.join(processed_data_dir, subdir)
        # Check if it's a directory
        if not os.path.isdir(subdir_path):
            continue

        total_categories += 1
        if total_items == 0:
            for filename in os.listdir(subdir_path):
                file_path = os.path.join(subdir_path, filename)
                data = np.load(file_path)
                print(f'data shape: {data.shape}')
                image_dim = data.shape
                break
        total_items += len(os.listdir(subdir_path))

    print(f'found {total_items} items!')
        
    X = np.zeros((total_items, *image_dim))
    print(f'x dim: {X.shape}')
    Y = np.zeros((total_items, total_categories))
    
    category_counter = 0
    item_counter = 0

    # List all subfolders
    for subdir in os.listdir(processed_data_dir):
        if folders is not None and subdir not in folders:
            continue
        
        subdir_path = os.path.join(processed_data_dir, subdir)
        print(f'reading directory {subdir} with {len(os.listdir(subdir_path))} items')
        
        # Check if it's a directory
        if not os.path.isdir(subdir_path):
            continue
        
        # Process each file in the subdirectory
        for filename in os.listdir(subdir_path):
            file_path = os.path.join(subdir_path, filename)
            data = np.load(file_path)
            
            X[item_counter] = data
            Y[item_counter][category_counter] = 1
            item_counter += 1

            if item_counter % 100 == 0:
                print(f'processed {item_counter}/{len(os.listdir(subdir_path))}')

        category_counter += 1

    print(f'label statistics: {np.sum(Y, axis=0)}')
    print('finished feature import')
    return X, Y

In [123]:
class CNN(nn.Module):
    def __init__(self, in_dim, num_classes):
        super(CNN, self).__init__()

        self.in_dim = in_dim
        self.num_classes = num_classes        

        self.fc_layer_size = 1000

        self.layer1_filters = 32

        self.layer1_kernel_size = (4,4)
        self.layer1_stride = 1
        self.layer1_padding = 0

        self.layer2_filters = 64

        self.layer2_kernel_size = (2,2)
        self.layer2_stride = 1
        self.layer2_padding = 0

        self.layer1_dim_h = (self.in_dim[1] - self.layer1_kernel_size[0]) / self.layer1_stride + 1
        self.layer1_dim_w = (self.in_dim[2] - self.layer1_kernel_size[1]) / self.layer1_stride + 1        

        self.layer2_dim_h = ((self.layer1_dim_h // 2) - self.layer2_kernel_size[0]) / self.layer2_stride + 1
        self.layer2_dim_w = ((self.layer1_dim_w // 2)  - self.layer2_kernel_size[1]) / self.layer2_stride + 1        

        print(f'layer2_dim_h {self.layer2_dim_h}\nlayer2_dim_w {self.layer2_dim_w}')

        self.conv1 = nn.Conv2d(3, self.layer1_filters, self.layer1_kernel_size, stride=self.layer1_stride, padding=self.layer1_padding)
        self.dropout = nn.Dropout(0.05)
        self.batchnorm1 = nn.BatchNorm2d(self.layer1_filters)
        self.pooling = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(self.layer1_filters, self.layer2_filters, self.layer2_kernel_size, stride=self.layer2_stride, padding=self.layer2_padding)
        self.batchnorm2 = nn.BatchNorm2d(self.layer2_filters)
        self.pooling2 = nn.MaxPool2d(2, 2)

        self.fc_inputs = int(self.layer2_filters * (self.layer2_dim_h // 2) * (self.layer2_dim_w // 2))

        self.lin1 = nn.Linear(self.fc_inputs, self.fc_layer_size)

        self.lin2 = nn.Linear(self.fc_layer_size, self.num_classes)


    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.batchnorm1(x)
        x = self.pooling(x)
        x = F.relu(self.conv2(x))
        x = self.batchnorm2(x)
        x = self.pooling2(x)
        # flatten convolutional layer into vector
        x = x.view(x.size(0), -1)
        x = F.relu(self.lin1(x))
        x = self.dropout(x)
        x = self.lin2(x)
        return x
    

def get_category(output):
    return torch.argmax(torch.abs(output))
    #return torch.argmax(output)
    

In [133]:
class VGG16(nn.Module):
    def __init__(self, num_classes):
        super(VGG16, self).__init__()
        fc_layer_size = 4096

        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU())
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer5 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.layer6 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.layer7 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer8 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer9 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer10 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer11 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer12 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer13 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(512, fc_layer_size),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(fc_layer_size, fc_layer_size),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(fc_layer_size, num_classes))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = self.layer9(out)
        out = self.layer10(out)
        out = self.layer11(out)
        out = self.layer12(out)
        out = self.layer13(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [125]:
class SimpleDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [129]:
"""
RUNNER
"""

# directory containing original images
raw_data_dir = '../../Data/pokemon/PokemonData'

# directory to save processed numpy arrays
processed_data_dir = '../../Data/pokemon/PokemonDataProcessed'

# specifies which categories to read from. Leave as None to read from all
folders = ['Pikachu','Ditto']
folders = None

#preprocess(raw_data_dir, processed_data_dir)

X, Y = import_images(processed_data_dir, folders)
X = np.moveaxis(X, -1, 1)
print(f'X shape: {X.shape}\nY Shape: {Y.shape}')

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
in_dim = X[0].shape
num_classes = Y[0].shape

y_train = np.argmax(y_train, axis=1)
y_test = np.argmax(y_test, axis=1)

x_train = torch.from_numpy(x_train)
#x_train = torch.unsqueeze(x_train, 2)
x_train = x_train.type(torch.float32).to(device)
y_train = torch.from_numpy(y_train)
#y_train = torch.unsqueeze(y_train, 1)
y_train = y_train.type(torch.LongTensor).to(device)

x_test = torch.from_numpy(x_test)
#x_test = torch.unsqueeze(x_test, 2)
x_test = x_test.type(torch.float32).to(device)
y_test = torch.from_numpy(y_test)
#y_test = torch.unsqueeze(y_test, 1)
y_test = y_test.type(torch.LongTensor).to(device)



print(f'X train shape: {x_train.shape}\nY Shape: {y_train.shape}')
print(f'indim: {in_dim} outdim: {num_classes}')


data shape: (32, 32, 3)
found 11945 items!
x dim: (11945, 32, 32, 3)
reading directory Abra with 71 items
reading directory Aerodactyl with 69 items
processed 100/69
reading directory Alakazam with 106 items
processed 200/106
reading directory Arbok with 91 items
processed 300/91
reading directory Arcanine with 66 items
processed 400/66
reading directory Articuno with 79 items
reading directory Beedrill with 47 items
processed 500/47
reading directory Bellsprout with 66 items
reading directory Blastoise with 57 items
processed 600/57
reading directory Bulbasaur with 50 items
processed 700/50
reading directory Butterfree with 75 items
reading directory Caterpie with 65 items
processed 800/65
reading directory Chansey with 84 items
processed 900/84
reading directory Charizard with 88 items
processed 1000/88
reading directory Charmander with 111 items
processed 1100/111
reading directory Charmeleon with 44 items
reading directory Clefable with 80 items
processed 1200/80
reading directory 

In [134]:
learning_rate = 0.001
criterion = nn.NLLLoss()
momentum = 0.9
cnn = CNN(in_dim, num_classes[0]).to(device)
optimizer = optim.Adam(cnn.parameters(), lr=learning_rate, weight_decay=1e-3)
train_batch_size = 100
test_batch_size = 10

# Create Dataset objects, then create torch dataloader
train_dataset = SimpleDataset(x_train, y_train)
test_dataset = SimpleDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

def train(net, loader, optimizer, epoch, device):
    print(f'training')
    net.train()
    correct = 0
    for batch_idx, (data, target) in enumerate(loader):

        data, target = data.to(device), target.to(device)

        # clear up gradients for backprop
        optimizer.zero_grad()
        output = F.log_softmax(net(data), dim=1)
        #print(f'output: {output} target: {target}')
        # use NLL loss
        loss = criterion(output, target)

        # compute gradients and make updates
        loss.backward()
        optimizer.step()

        pred = output.data.max(1, keepdim=True)[1]
        correct += (pred.eq(target.data.view_as(pred)).sum().item())

    print(f'\tTraining epoch {epoch} Accuracy: {100 * correct / len(loader.dataset)}%')


def test(net, loader, device):
    print(f'testing')
    net.eval()

    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)

            output = net(data)
            test_loss += F.nll_loss(output, target, size_average=False).item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += (pred.eq(target.data.view_as(pred)).sum().item())

            total = total + 1
    accuracy = 100 * correct / len(loader.dataset)
    print(f'\ttesting accuracy: {accuracy}%')
    return accuracy


n_epoch = 20

for epoch in range(n_epoch):
    print(f'epoch {epoch}')
    train(cnn, train_loader, optimizer, epoch, device)
    test(cnn, test_loader, device)

epoch 0
training
	Training epoch 0 Accuracy: 1.1615738802846378%
testing




	testing accuracy: 1.4650481372959396%
epoch 1
training


KeyboardInterrupt: 

In [None]:
learning_rate = 0.0005
criterion = nn.NLLLoss()
momentum = 0.9
cnn = VGG16(num_classes[0]).to(device)
optimizer = optim.Adam(cnn.parameters(), lr=learning_rate, weight_decay=1e-5)
train_batch_size = 100
test_batch_size = 10

# Create Dataset objects, then create torch dataloader
train_dataset = SimpleDataset(x_train, y_train)
test_dataset = SimpleDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

def train(net, loader, optimizer, epoch, device):
    print(f'training')
    net.train()
    correct = 0
    for batch_idx, (data, target) in enumerate(loader):

        data, target = data.to(device), target.to(device)

        # clear up gradients for backprop
        optimizer.zero_grad()
        output = F.log_softmax(net(data), dim=1)
        #print(f'output: {output} target: {target}')
        # use NLL loss
        loss = criterion(output, target)

        # compute gradients and make updates
        loss.backward()
        optimizer.step()

        pred = output.data.max(1, keepdim=True)[1]
        correct += (pred.eq(target.data.view_as(pred)).sum().item())

    print(f'\tTraining epoch {epoch} Accuracy: {100 * correct / len(loader.dataset)}%')


def test(net, loader, device):
    print(f'testing')
    net.eval()

    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)

            output = net(data)
            test_loss += F.nll_loss(output, target, size_average=False).item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += (pred.eq(target.data.view_as(pred)).sum().item())

            total = total + 1
    accuracy = 100 * correct / len(loader.dataset)
    print(f'\ttesting accuracy: {accuracy}%')
    return accuracy


n_epoch = 100

for epoch in range(n_epoch):
    print(f'epoch {epoch}')
    train(cnn, train_loader, optimizer, epoch, device)
    test(cnn, test_loader, device)