In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import os
import pandas as pd
from glob import glob
from IPython.display import clear_output

# Set training parameters

In [None]:
use_gpu = True # change to False if not using gpu
num_epochs = 20
batch_size = 30
num_workers = 12
learning_rate = 0.0001

# Create training dataset and data loader

In [None]:
channel_means = [129.3, 124.1,112.4]
channel_stds = [68.2, 65.4,70.4]
train_transform = transforms.Compose([
        transforms.Resize(512),
        transforms.RandomCrop(512),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[n / 255. for n in channel_means],
                             std=[n / 255. for n in channel_stds])])

train_dataset=datasets.ImageFolder(root="data/train/",transform=train_transform)
trainLoader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=batch_size, num_workers=num_workers, pin_memory=False)

# Train a ResNet50 Model

In [None]:
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features,100)

if use_gpu:
    device = torch.device("cuda")
    model.to(device)

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

n_iter = 0
for epoch in range(num_epochs):
    model.train()
    running_loss = []
    for i, data in enumerate(trainLoader,0):
        inputs, labels = data
        if use_gpu:
            inputs = inputs.to(device)
            labels = labels.to(device)
        optimizer.zero_grad()
        outputs = torch.squeeze(model(inputs))
        loss = criterion(outputs,labels)
        loss.backward()
        optimizer.step()

# Evaluate on test data.

Note: this is fairly inefficient as it only loads one image as a time. Could be improved by implementing a dataloader.

In [None]:
test_ground_truth = pd.read_csv('data/test_data.csv')
test_folder = 'data/test/'
gt_files = np.array([os.path.join(test_folder,t.strip()) for t in test_ground_truth['filename']]) # the test_data file seems to have spaces in the filenames

test_transform = transforms.Compose([
        transforms.Resize(512),
        transforms.CenterCrop(512),
        transforms.ToTensor(),
        transforms.Normalize(mean=[n / 255. for n in channel_means],
                             std=[n / 255. for n in channel_stds])])

In [None]:
model.eval()

pred_classes = []
gt_classes = []
all_outputs = []
with torch.no_grad():
    for im in gt_files: 
        img = Image.open(im)
        inputs = test_transform(img).unsqueeze(0)
        if use_gpu:
            inputs = inputs.to(device)
            
        outputs = torch.squeeze(model(inputs))
        if use_gpu:
            outputs = outputs.detach().cpu()
        
        all_outputs.append(outputs.numpy())
        
        pred_class = train_dataset.classes[outputs.argmax()]
        pred_classes.append(pred_class)
        
        gt_class = test_ground_truth.iloc[np.where(gt_files==im.split('/')[-1])[0][0]]['cultivar'].strip()
        gt_classes.append(gt_class)

# Compute accuracy

In [None]:
is_correct = [p==g for p,g in zip(pred_classes, gt_classes)]
accuracy = sum(is_correct) / len(results)

print(accuracy)