In [1]:
%matplotlib inline
import numpy as np
import os
import glob
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch.utils.data import random_split
from torch.utils.data import Sampler
from torch.utils.data import WeightedRandomSampler
import torchvision.transforms as transforms
import cv2
import matplotlib.pyplot as plt
from datetime import datetime

In [2]:
if torch.cuda.is_available():
    print("The code will run on GPU.")
else:
    print("The code will run on CPU.")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

The code will run on GPU. This is important so things run faster.


In [3]:
class dataset (Dataset):
    def __init__(self,train,size,data_path='../data'):  
        self.size = size
        data_path = os.path.join(data_path, 'train' if train else 'test')
        image_classes = [os.path.split(d)[1] for d in glob.glob(data_path +'/*') if os.path.isdir(d)]
        image_classes.sort()
        self.name_to_label = {c: id for id, c in enumerate(image_classes)}
        self.image_paths = glob.glob(data_path + '/*/*.npy')
    
    def __len__(self):
        return len(self.image_paths) #len(self.data)
    
    def __getitem__(self,idx):        
        image_path = self.image_paths[idx]
        
        image = np.load(image_path)
        c = os.path.split(os.path.split(image_path)[0])[1]
        y = self.name_to_label[c]

        image = image[:,:,:7]
        #create a simple mask, and make everything else 0
        mask = image[:,:,4].copy()
        #fix divide by zero
#         mask[image[:,:,4]/image[:,:,1] < 1] = 0 
#         mask[image[:,:,4]/image[:,:,1] >= 1] = 1
        
        mask[image[:,:,4] < 35] = 0 
        mask[image[:,:,4] >= 35] = 1
        image[mask==0] = 0
        
        image = cv2.resize(image,self.size,interpolation=cv2.INTER_LINEAR)

        X = transforms.functional.to_tensor(image)
        return X,y
    
    def get_image_paths(self):
        return self.image_paths

In [4]:
size=(64,128)
train_set = dataset(train=True,size=size)
test_set = dataset(train=False,size=size)

batch_size = 512
weights = []

train_paths = train_set.get_image_paths()
oat_length = len(os.listdir('../data/train/Oat'))
wheat_length = len(os.listdir('../data/train/Wheat'))
rye_length = len(os.listdir('../data/train/Rye'))
broken_length = len(os.listdir('../data/train/Broken'))
barley_length = len(os.listdir('../data/train/Barley'))

for file in train_paths:
    label = os.path.split(os.path.split(file)[0])[1]
    if label == 'Oat':
        weights.append(0.2/oat_length)
    elif label == "Wheat":
        weights.append(0.2/wheat_length)
    elif label == "Rye":
        weights.append(0.2/rye_length)
    elif label == "Broken":
        weights.append(0.2/broken_length)
    else:
        weights.append(0.2/barley_length)
weights = torch.FloatTensor(weights)
sampler = WeightedRandomSampler(weights=weights,num_samples=len(train_set),replacement=True)

train_loader = DataLoader(train_set, batch_size=batch_size,sampler=sampler,num_workers=0)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False,num_workers=0)

In [5]:
class ResNetBlock(nn.Module):
    def __init__(self, n_features):
        super(ResNetBlock, self).__init__()
        
        self.w1 = nn.Conv2d(in_channels=n_features,out_channels=n_features,kernel_size=3,stride=1,padding=1)
        self.w2 = nn.Conv2d(in_channels=n_features,out_channels=n_features,kernel_size=3,stride=1,padding=1)
        self.activation = nn.ReLU()
    
    def forward(self, x):
        identity = x.clone()
        x = self.w1(x)
        x = self.activation(x)
        x = self.w2(x)
        x = x+identity
        out = self.activation(x)
        return out
    
class SE_ResNetBlock(nn.Module):
    def __init__(self, n_features,r):
        super(SE_ResNetBlock, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=n_features,out_channels=n_features,kernel_size=3,stride=1,padding=1)
        self.conv2 = nn.Conv2d(in_channels=n_features,out_channels=n_features,kernel_size=3,stride=1,padding=1)
        self.activation = nn.ReLU()
        self.globalpool = nn.AdaptiveAvgPool2d(output_size=(1,1))
        self.fc = nn.Conv2d(in_channels=n_features,out_channels=n_features//r,kernel_size=1,stride=1,padding=0) #nn.Linear(in_features=n_features,out_features=n_features//r) 
        self.fc2 = nn.Conv2d(in_channels=n_features//r,out_channels=n_features,kernel_size=1,stride=1,padding=0) #nn.Linear(in_features=n_features//r,out_features=n_features)
        self.gate = nn.Sigmoid()
    
    def forward(self, x):
        identity = x.clone()
        out = self.conv1(x)
        
        out = self.activation(out)
        out = self.conv2(out)
        
        se = self.globalpool(out) #.unsqueeze(-1).unsqueeze(-1) add if using nn.linear
        se = self.fc(se)
        se = self.activation(se)
        se = self.fc2(se)
        se = self.gate(se)
        
        out = (out*se)+identity
        out = self.activation(out)
        return out

In [6]:
#Define network
class SE_ResNet(nn.Module):
    def __init__(self, n_in, n_features, num_blocks=2,r=8):
        super(SE_ResNet, self).__init__()
        #First conv layers needs to output the desired number of features.
        conv_layers =[nn.Conv2d(n_in, n_features, kernel_size=3, stride=1, padding=1),
                      nn.ReLU(),
                      nn.Conv2d(n_features,n_features,3,1,1),
                      nn.ReLU(),
                      nn.MaxPool2d(2,2), #128x64 -> 64x32
                      nn.Conv2d(n_features,2*n_features,3,1,1),
                      nn.ReLU()]
        
        for i in range(num_blocks):
            conv_layers.append(SE_ResNetBlock(2*n_features,r))
            
        conv_layers.append(nn.Sequential(nn.MaxPool2d(2,2),
                            nn.Conv2d(2*n_features, 4*n_features, kernel_size=3, stride=1, padding=1),
                            nn.ReLU())) #64x32 -> 32x16
        
        for i in range(num_blocks):
            conv_layers.append(SE_ResNetBlock(4*n_features,r))
            
        conv_layers.append(nn.Sequential(nn.MaxPool2d(2,2),
                            nn.Conv2d(4*n_features, 8*n_features, kernel_size=3, stride=1, padding=1),
                            nn.ReLU())) #32x16 ->16x8
        for i in range(num_blocks):
            conv_layers.append(SE_ResNetBlock(8*n_features,r))
        
        self.blocks = nn.Sequential(*conv_layers)
        
        self.fc = nn.Sequential(nn.Linear(16*8*8*n_features, 2048),
                                nn.ReLU(),
                                nn.Linear(2048, 512),
                                nn.ReLU(),
                                nn.Linear(512,5))
        
    def forward(self, x):
        x = self.blocks(x)
        #reshape x so it becomes flat, except for the first dimension (which is the minibatch)
        x = x.view(x.size(0), -1)
        out = self.fc(x)
        return out

In [7]:
def focal(outputs,targets,alpha=1,gamma=2):
    ce_loss = F.cross_entropy(outputs, targets, reduction='none') # important to add reduction='none' to keep per-batch-item loss
    pt = torch.exp(-ce_loss)
    focal_loss = (alpha * (1-pt)**gamma * ce_loss).mean() # mean over the batch
    return focal_loss

In [8]:
#Define the training as a function.
def train(model, optimizer, num_epochs=10):
    train_acc_all = []
    test_acc_all = []

    for epoch in range(num_epochs):
        model.train()
        #For each epoch
        train_correct = 0
        for minibatch_no, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            #Zero the gradients computed for each weight
            optimizer.zero_grad()
            #Forward pass your image through the network
            output = model(data)
            #Compute the loss
            loss = focal(output,target) #F.nll_loss(torch.log(output), target)
            #Backward pass through the network
            loss.backward()
            #Update the weights
            optimizer.step()
            
            #Compute how many were correctly classified
            predicted = output.argmax(1)
            train_correct += (target==predicted).sum().cpu().item()
            
            #Remove mini-batch from memory
            del data, target, loss
        #Comput the test accuracy
        test_correct = 0
        model.eval()
        for data, target in test_loader:
            data = data.to(device)
            with torch.no_grad():
                output = model(data)
            predicted = output.argmax(1).cpu()
            test_correct += (target==predicted).sum().item()
        train_acc = train_correct/len(train_set)
        test_acc = test_correct/len(test_set)
        train_acc_all.append(train_acc)
        test_acc_all.append(test_acc)
        print("Accuracy train: {train:.1f}%\t test: {test:.1f}%".format(test=100*test_acc, train=100*train_acc))
    return test_acc_all, train_acc_all

In [9]:
model = SE_ResNet(n_in=7,n_features=8).float()
model.to(device)
optimizer = optim.SGD(model.parameters(),lr=1e-3)
test_acc_all,train_acc_all = train(model,optimizer,num_epochs=1)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


Accuracy train: 19.7%	 test: 1.5%


In [None]:
# #Save model
# today = datetime.today()
# torch.save(model.state_dict(), '../Models/SEResNet-{date}'.format(date=today.strftime("%I%p-%d-%h")))
# np.save('../Models/test_res_{}'.format(today.strftime("%I%p-%d-%h")),test_acc_all)
# np.save('../Models/train_res_{}'.format(today.strftime("%I%p-%d-%h")),train_acc_all)