In [1]:
import os
import torch
import torchvision
import numpy as np
import pandas as pd
import pickle as pkl
import torch.nn as nn
import tifffile as tif
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset

os.chdir("../../")

In [35]:
class ImageLoader(Dataset):
    
    def __init__(self, data, batch_size, crop_size):
        
        self.x = [t[0] for t in data]
        self.y = [t[1] for t in data]
        self.batch_size = batch_size
        self.crop_size = crop_size
        
    def __len__(self):
        
        return len(self.x)
    
    def __getitem__(self,idx):
        
        images = torch.from_numpy(tif.imread(self.x[idx])[:,self.crop_size:-self.crop_size,self.crop_size:-self.crop_size].astype('float32'))
        true_labels = torch.LongTensor([self.y[idx]])
        
        return images ,true_labels

In [36]:
class Data_Generator:
    
    def __init__(self, directory):
        
        self.path = directory
        
        with open("Data/hierarchy_data.pkl", "rb") as f:
            self.hd = pkl.load(f)
        with open("Data/class_encoding.pkl", "rb") as f:
            self.class_encoded = pkl.load(f)
        with open("Data/order_encoding.pkl", "rb") as f:
            self.order_encoded = pkl.load(f)
        with open("Data/family_encoding.pkl", "rb") as f:
            self.family_encoded = pkl.load(f)
        with open("Data/genus_encoding.pkl", "rb") as f:
            self.genus_encoded = pkl.load(f)
        with open("Data/specie_encoding.pkl", "rb") as f:
            self.specie_list = pkl.load(f)
        
        self.train_data = []
        self.test_data = []
        
        for cls in self.hd.keys():
            for odr in self.hd[cls].keys():
                for fmly in self.hd[cls][odr].keys():
                    for gns in self.hd[cls][odr][fmly].keys():
                        for spc in self.hd[cls][odr][fmly][gns]:
                            dir_path = self.path+"train/"+str(self.class_encoded[cls])+"/"+str(self.order_encoded[odr])+"/"+str(self.family_encoded[fmly])+"/"+str(self.genus_encoded[gns])+"/"+str(spc)
                            for img in os.listdir(dir_path):
                                #y = np.zeros(10)
                                #y[self.class_encoded[cls]] = 1
                                self.train_data.append((dir_path+"/"+img, self.class_encoded[cls]))
                            dir_path = self.path+"test/"+str(self.class_encoded[cls])+"/"+str(self.order_encoded[odr])+"/"+str(self.family_encoded[fmly])+"/"+str(self.genus_encoded[gns])+"/"+str(spc)
                            for img in os.listdir(dir_path):
                                #y = np.zeros(10)
                                #y[self.class_encoded[cls]] = 1
                                self.test_data.append((dir_path+"/"+img, self.class_encoded[cls]))
        
        np.random.shuffle(self.train_data)
        np.random.shuffle(self.test_data)
        
    def data_generators(self, batch_size, crop_size):
        
        train_dataset = ImageLoader(self.train_data, batch_size, crop_size)
        test_dataset = ImageLoader(self.test_data, batch_size, crop_size)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
        
        return train_loader, test_loader

In [37]:
ob = Data_Generator(directory="Data/Hierarchial Data/")
train_loader, test_loader = ob.data_generators(batch_size=32, crop_size=16)

In [51]:
class NN(nn.Module):
    
    def __init__(self):
        
        super(NN, self).__init__()
        self.conv1 = nn.Conv2d(33, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 1)
    
    def forward(self, x):
        
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [52]:
net = NN()

In [53]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [54]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data
        
        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)
        print(inputs.size(), labels.size())
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        print(outputs.size())
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.data[0]
        if i % 200 == 199:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

torch.Size([32, 33, 32, 32]) torch.Size([32, 1])
torch.Size([32, 1])


RuntimeError: multi-target not supported at /pytorch/torch/lib/THNN/generic/ClassNLLCriterion.c:22