In [1]:
import numpy as np
import matplotlib.pyplot as plt
# %matplotlib inline
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, transforms, utils, models
import torchvision.utils as vutils

#from IPython.display import Image
import time
from pylab import rcParams

from tensorflow import summary

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print (device)
print (torch.cuda.device_count())
torch.manual_seed(42)

cuda:0
1


<torch._C.Generator at 0x7f28dd6b4110>

### Baseline model for transfer learning

In [2]:
def load_train_dataset(data_path, batch_size, num_workers):
    dataset = datasets.ImageFolder(
        root=data_path,
        transform=transforms.Compose([#transforms.Resize(100),
                                      #transforms.CenterCrop(64),
                                      transforms.Resize((128, 128)),
                                      transforms.RandomAffine(20),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.RandomVerticalFlip(),
                                      transforms.RandomRotation(180),
                                      transforms.ColorJitter(brightness=0.3, contrast=0.4),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                                           std=[0.229, 0.224, 0.225])
                                      ])
    )

    loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=True
    )
    return loader

def load_val_dataset(data_path, batch_size, num_workers):
    dataset = datasets.ImageFolder(
        root=data_path,
        transform=transforms.Compose([#transforms.Resize(100),
                                      #transforms.CenterCrop(64),
                                      transforms.Resize((128, 128)),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                                           std=[0.229, 0.224, 0.225])
                                      ])
    )
  
    loader = torch.utils.data.DataLoader( 
        dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=True
    )
    return loader

In [3]:
from os.path import isfile, join, abspath, exists, isdir, expanduser
class SeedlingDataset(torch.utils.data.Dataset):
    def __init__(self, labels, root_dir, subset=False, transform=None):
        self.labels = labels
        self.root_dir = root_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        img_name = self.labels.iloc[idx, 0]
        fullname = join(self.root_dir, img_name)
        image = Image.open(fullname).convert('RGB')
        labels = self.labels.iloc[idx, 2]
        if self.transform:
            image = self.transform(image)
        return image, int(labels)

In [4]:
import os
import pandas as pd
image_size = 128
batch_size = 4
classes = os.listdir('train/')
classes = sorted(classes, key=lambda item: (int(item.partition(' ')[0])
                               if item[0].isdigit() else float('inf'), item))
num_to_class = dict(zip(range(len(classes)), classes))
num_to_class

{0: 'Black-grass',
 1: 'Charlock',
 2: 'Cleavers',
 3: 'Common Chickweed',
 4: 'Common wheat',
 5: 'Fat Hen',
 6: 'Loose Silky-bent',
 7: 'Maize',
 8: 'Scentless Mayweed',
 9: 'Shepherds Purse',
 10: 'Small-flowered Cranesbill',
 11: 'Sugar beet'}

In [5]:
train = []
for index, label in enumerate(classes):
    path = 'train/' + label + '/'
    for file in os.listdir(path):
        train.append(['{}/{}'.format(label, file), label, index])
    
df = pd.DataFrame(train, columns=['file', 'category', 'category_id',]) 

train_data = df.sample(frac=0.7)
valid_data = df[~df['file'].isin(train_data['file'])]

In [6]:
train_trans = transforms.Compose([#transforms.Resize(100),
                                      #transforms.CenterCrop(64),
                                      transforms.Resize((128, 128)),
                                      transforms.RandomAffine(20),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.RandomVerticalFlip(),
                                      transforms.RandomRotation(180),
                                      transforms.ColorJitter(brightness=0.3, contrast=0.4),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                                           std=[0.229, 0.224, 0.225])
                                      ])

valid_trans = transforms.Compose([#transforms.Resize(100),
                                      #transforms.CenterCrop(64),
                                      transforms.Resize((128, 128)),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                                           std=[0.229, 0.224, 0.225])
                                      ])

In [7]:
train_set = SeedlingDataset(train_data, 'train/', transform = train_trans)
valid_set = SeedlingDataset(valid_data, 'train/', transform = valid_trans)

train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4)
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=4)

dataset_sizes = {
    'train': len(train_loader.dataset), 
    'valid': len(valid_loader.dataset)
}

In [8]:
def get_accuracy(outputs, labels):
    _, argmax = torch.max(outputs, 1)
    return (labels == argmax.squeeze()).float().mean()

def save_checkpoint(state, is_best, filename=''):
    if is_best:
        filename = filename+'model_best.pth.tar'        
        torch.save(state, filename)

def train(epoch, globaliter, train_loader):
    net.train()
    running_loss = 0.
    running_accuracy = 0.
    # globaliter = 0
    
    for batch_idx, data in enumerate(train_loader):
        globaliter += 1
        images, labels = data[0].type(torch.FloatTensor), data[1]
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = net(images)
        accuracy = get_accuracy(outputs, labels).item()
        running_accuracy += accuracy
        loss = criterion(outputs, labels)
        with train_summary_writer.as_default():
            summary.scalar('loss', loss.item(), step=globaliter)
            summary.scalar('accuracy', accuracy, step=globaliter)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
        
        
    running_accuracy /= len(train_loader)
    running_loss /= len(train_loader)
    print('Train set: Loss: {:.4f}, Accuracy: {:.2f}%)'.format(
        running_loss, 100 * running_accuracy))
    running_loss = 0.
    running_accuracy = 0.
    net.train()

    return running_loss, running_accuracy, globaliter

def valid(globaliter, valid_loader):
    net.eval()
    valid_loss = 0.
    valid_accuracy = 0.
    correct = 0
    with torch.no_grad():
        for data in valid_loader:
            images, labels = data[0].type(torch.FloatTensor), data[1]
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            valid_loss += criterion(outputs, labels).item()
            valid_accuracy += get_accuracy(outputs, labels).item()
    valid_loss /= len(valid_loader)
    valid_accuracy /= len(valid_loader)
    print('Test set: Loss: {:.4f}, Accuracy: {:.2f}%)'.format(
        valid_loss, 100 * valid_accuracy))
    print('___________\n')
    with test_summary_writer.as_default():
          summary.scalar('loss', valid_loss, step=globaliter)
          summary.scalar('accuracy', valid_accuracy, step=globaliter)
    return valid_loss, valid_accuracy

In [9]:
import datetime
current_time = str(datetime.datetime.now().timestamp())
train_log_dir = 'logs/tensorboard/train/' + "dense_transfer_train_1"
test_log_dir = 'logs/tensorboard/test/' + "dense_transfer_test_1"
train_summary_writer = summary.create_file_writer(train_log_dir)
test_summary_writer = summary.create_file_writer(test_log_dir)

#%reload_ext tensorboard
#%tensorboard --logdir logs
# %tensorboard --logdir logs/tensorboard

In [11]:
#print(model)
net = torchvision.models.densenet169()
num_ftrs = net.classifier.in_features
net.classifier = nn.Linear(num_ftrs, 12)
net = net.cuda()
criterion = nn.CrossEntropyLoss().to(device) 
optimizer = optim.Adam(net.parameters(), lr=0.007)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=37, gamma=0.3)

epochs = 100
best_accuracy = 0
_time = time.time()
globaliter = 0

for epoch in range(1, epochs+1):
    print("Epoch:", epoch)
    running_loss, running_accuracy, globaliter = train(epoch, globaliter, train_loader)
    print("Time:", time.time() - _time, '\n')
    if (epoch % 1 == 0):
        #print('___________')
        #print("Validation:")
        loss, accuracy = valid(globaliter, valid_loader)
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            is_best = True
        else:
            is_best = False
        save_checkpoint({
            'epoch': epoch,
            'state_dict': net.state_dict(),
            'best_prec1': accuracy,
            'optimizer': optimizer.state_dict(),
        }, is_best, filename = "densenet169_base_transfer_")
    scheduler.step()

Epoch: 1
Train set: Loss: 2.5236, Accuracy: 15.02%)
Time: 133.68719959259033 

Test set: Loss: 2.1224, Accuracy: 25.28%)
___________

Epoch: 2
Train set: Loss: 2.0531, Accuracy: 27.37%)
Time: 280.8477590084076 

Test set: Loss: 1.9326, Accuracy: 30.95%)
___________

Epoch: 3
Train set: Loss: 1.8406, Accuracy: 34.89%)
Time: 429.1937322616577 

Test set: Loss: 1.6511, Accuracy: 42.58%)
___________

Epoch: 4
Train set: Loss: 1.7666, Accuracy: 36.78%)
Time: 577.6149618625641 

Test set: Loss: 1.5394, Accuracy: 45.24%)
___________

Epoch: 5
Train set: Loss: 1.6530, Accuracy: 41.14%)
Time: 726.2346887588501 

Test set: Loss: 1.4921, Accuracy: 46.29%)
___________

Epoch: 6
Train set: Loss: 1.4992, Accuracy: 46.33%)
Time: 873.5995965003967 

Test set: Loss: 1.2228, Accuracy: 51.75%)
___________

Epoch: 7
Train set: Loss: 1.3626, Accuracy: 52.67%)
Time: 1022.3228600025177 

Test set: Loss: 1.0838, Accuracy: 63.24%)
___________

Epoch: 8
Train set: Loss: 1.2700, Accuracy: 55.29%)
Time: 1171.2356

### Transfer learning on our task

In [16]:
train_root = '/content/drive/My Drive/DL/project/data/tomato/sep_data/train'
val_root = '/content/drive/My Drive/DL/project/data/tomato/sep_data/val'

train_root_weed = 'weeds/Train'#sep_weed/train'
val_root_weed = 'weeds/Test'#sep_weed/val'

batch_size = 128
num_workers = 4

%time train_data_loader = load_train_dataset(train_root_weed, batch_size, num_workers)
%time val_data_loader = load_val_dataset(val_root_weed, batch_size, num_workers)

CPU times: user 24 ms, sys: 0 ns, total: 24 ms
Wall time: 22.4 ms
CPU times: user 0 ns, sys: 8 ms, total: 8 ms
Wall time: 8.19 ms


In [19]:
train_log_dir = 'logs/tensorboard/train/' + "dense_transfer_train_stages_1"
test_log_dir = 'logs/tensorboard/test/' + "dense_transfer_test_stages_1"
train_summary_writer = summary.create_file_writer(train_log_dir)
test_summary_writer = summary.create_file_writer(test_log_dir)

In [None]:
net = torchvision.models.densenet169()
num_ftrs = net.classifier.in_features
net.classifier = nn.Linear(num_ftrs, 12)
checkpoint = torch.load('densenet169_base_transfer_model_best.pth.tar')
net.load_state_dict(checkpoint['state_dict'])
#for param in net.parameters():
#    param.requires_grad = False
num_ftrs = net.classifier.in_features
net.classifier = nn.Linear(num_ftrs, 9)
net.cuda()

weight = torch.tensor([1098.0, 3298.0, 942.0, 2082.0, 601.0, 650.0, 169.0, 160.0, 570.0])
weight = weight / torch.max(weight)

criterion = nn.CrossEntropyLoss(weight=weight).to(device)
optimizer = optim.Adam(net.parameters(), lr=0.025)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=27, gamma=0.3)

epochs = 95
best_accuracy = 0
_time = time.time()
globaliter = 0

for epoch in range(1, epochs+1):
    print("Epoch:", epoch)
    running_loss, running_accuracy, globaliter = train(epoch, globaliter, train_data_loader)
    print("Time:", time.time() - _time, '\n')
    if (epoch % 1 == 0):
        #print('___________')
        #print("Validation:")
        loss, accuracy = valid(globaliter, val_data_loader)
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            is_best = True
        else:
            is_best = False
        save_checkpoint({
            'epoch': epoch,
            'state_dict': net.state_dict(),
            'best_prec1': accuracy,
            'optimizer': optimizer.state_dict(),
        }, is_best, filename = "densenet169_transfer_true_data_1_")
    scheduler.step()


Epoch: 1
Train set: Loss: 0.9599, Accuracy: 46.59%)
Time: 24.645110845565796 

Test set: Loss: 0.7353, Accuracy: 60.83%)
___________

Epoch: 2
Train set: Loss: 0.7544, Accuracy: 54.48%)
Time: 52.91909217834473 

Test set: Loss: 0.8612, Accuracy: 58.84%)
___________

Epoch: 3
Train set: Loss: 0.7159, Accuracy: 56.37%)
Time: 81.32513165473938 

Test set: Loss: 0.6442, Accuracy: 62.61%)
___________

Epoch: 4
Train set: Loss: 0.6974, Accuracy: 56.62%)
Time: 110.29714107513428 

Test set: Loss: 0.6727, Accuracy: 62.00%)
___________

Epoch: 5
Train set: Loss: 0.6790, Accuracy: 57.29%)
Time: 138.48596334457397 

Test set: Loss: 0.5977, Accuracy: 65.93%)
___________

Epoch: 6
Train set: Loss: 0.6641, Accuracy: 57.99%)
Time: 167.16017508506775 

Test set: Loss: 0.6779, Accuracy: 62.94%)
___________

Epoch: 7
Train set: Loss: 0.6549, Accuracy: 58.11%)
Time: 195.2121262550354 

Test set: Loss: 0.5474, Accuracy: 65.55%)
___________

Epoch: 8
Train set: Loss: 0.6519, Accuracy: 58.83%)
Time: 223.297