In [1]:
from google.colab import drive
drive.mount('/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /gdrive


In [2]:
datadir = "/gdrive/My Drive/"

In [3]:
import numpy as np
import pandas as pd
import torch
import os
import cv2
import urllib
import random
import torchvision
import matplotlib.pyplot as plt

random.seed(42) 

#Downloading data

In [None]:
species = ["cordia africana", "croton", "eucalyptus", "gravilia", "rauvolfia cafffra"]

species_df = {}

for i in species:
  species_df[i] = pd.read_csv('{}{}.csv'.format(datadir, i)).values

In [None]:
species_df["rauvolfia cafffra"][0:8]

array([['https://treetracker-production-images.s3.eu-central-1.amazonaws.com/2020.05.22.18.30.17_2962105b-1b6c-4f74-8a2e-af0b8c7267ec_IMG_20191126_180925_798699084.jpg'],
       ['https://treetracker-production-images.s3.eu-central-1.amazonaws.com/2020.05.22.18.30.19_55cba901-10e7-46f1-9539-ac213cd23955_IMG_20191126_180942_-1950841047.jpg'],
       ['https://treetracker-production-images.s3.eu-central-1.amazonaws.com/2020.05.22.18.30.20_58601598-269e-41f2-853c-905d687e1fe4_IMG_20191126_180957_-1128881177.jpg'],
       ['https://treetracker-production-images.s3.eu-central-1.amazonaws.com/2020.05.22.18.30.21_ff5cadee-fcd0-4aae-bdb9-51ed2f26e7bd_IMG_20191126_181049_-263104524.jpg'],
       ['https://treetracker-production-images.s3.eu-central-1.amazonaws.com/2020.05.22.18.30.31_07f717ba-6363-4b19-a507-aa4eac9f40ca_IMG_20191126_181306_-1254242593.jpg'],
       ['https://treetracker-production-images.s3.eu-central-1.amazonaws.com/2020.05.22.18.30.33_60d31e46-ee86-46b6-a067-4f498bc2b744_IMG_

In [None]:
def create_dir(name, datadir = datadir):
  if not os.path.exists('{}{}'.format(datadir, name)):
      os.makedirs("{}{}".format(datadir, name))
      print("Directory " , name ,  " Created ")
  else:    
      print("Directory " , name ,  " already exists")

create_dir("greenstand_train")

create_dir("greenstand_test")

Directory  greenstand_train  already exists
Directory  greenstand_test  Created 


In [None]:
for key in species_df.keys():
  create_dir(key, datadir+"greenstand_train/")
  create_dir(key, datadir+"greenstand_test/")
  data = list(species_df[key].flatten())
  random.shuffle(data)

  dir = "{}greenstand_test/{}".format(datadir, key)
  for c, url in enumerate(data[:20]):
    resource = urllib.request.urlopen(url)
    output = open("{}/{}.jpg".format(dir, c),"wb")
    output.write(resource.read())
    output.close()

  dir = "{}greenstand_train/{}".format(datadir, key)
  for c, url in enumerate(data[20:]):
    resource = urllib.request.urlopen(url)
    output = open("{}/{}.jpg".format(dir, c),"wb")
    output.write(resource.read())
    output.close()


Directory  cordia africana  already exists
Directory  cordia africana  already exists
Directory  croton  Created 
Directory  croton  Created 
Directory  eucalyptus  Created 
Directory  eucalyptus  Created 
Directory  gravilia  Created 
Directory  gravilia  Created 
Directory  rauvolfia cafffra  Created 
Directory  rauvolfia cafffra  Created 


#Training

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
#from https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/3
def make_weights_for_balanced_classes(images, nclasses):
  #returns appriopiate sampling weights for unbalanced data                        
    count = [0] * nclasses                                                      
    for item in images:                                                         
        count[item[1]] += 1                                                     
    weight_per_class = [0.] * nclasses                                      
    N = float(sum(count))                                                   
    for i in range(nclasses):                                                   
        weight_per_class[i] = N/float(count[i])                                 
    weight = [0] * len(images)                                              
    for idx, val in enumerate(images):                                          
        weight[idx] = weight_per_class[val[1]]                                  
    return weight   

In [13]:
jitter = torchvision.transforms.ColorJitter(hue=0.5, saturation=0.5, contrast=0.5)
rotate = torchvision.transforms.RandomRotation(20)

#crop to right input size of pre-trained network
crop = torchvision.transforms.CenterCrop(480)
resize = torchvision.transforms.Resize((224, 224))
#standard normalizing required by resnet
normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225])
transforms = torchvision.transforms.Compose([crop, resize, rotate, jitter, torchvision.transforms.ToTensor(), normalize])

train_dataset = torchvision.datasets.ImageFolder(
        root=datadir+"greenstand_train/",
        transform=transforms)

#combat imbalanced classes by weightsampling
weights = make_weights_for_balanced_classes(train_dataset.imgs, len(train_dataset.classes))                                                                
weights = torch.DoubleTensor(weights)                                       
sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights))                     
                                                                                                            

train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=32,
        num_workers=20,
        sampler = sampler)


In [7]:
#initialize pre-trained model
model = torchvision.models.resnet18(pretrained=True)
#only use the pre-trained model as feature extracter
for param in model.parameters():
  param.requires_grad = False
#change last layer to correct amount of classes
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 5)
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/checkpoints/resnet18-5c106cde.pth


HBox(children=(FloatProgress(value=0.0, max=46827520.0), HTML(value='')))




In [9]:
model = torch.load(datadir+"resnet18_model.pt")
model = model.to(device)

In [8]:
#select only the parameters to be updated to pass to the optimizer
params_to_update = model.parameters()
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

optimizer = torch.optim.Adam(params_to_update, lr=0.001)

Params to learn:
	 fc.weight
	 fc.bias


In [9]:
# Setup the loss
criterion = torch.nn.CrossEntropyLoss()

In [18]:
num_epochs = 10

model.train()
for epoch in range(num_epochs):
  print('Epoch {}/{}'.format(epoch, num_epochs - 1))
  print('-' * 10)
  running_loss= 0
  running_corrects = 0

  model.train()  # Set model to training mode
  for c, data in enumerate(train_loader):
      inputs = data[0].to(device)
      labels = data[1].to(device)

      # zero the parameter gradients
      optimizer.zero_grad()

      # forward
      # track history if only in train
     
      outputs = model(inputs)
      loss = criterion(outputs, labels)

      _, preds = torch.max(outputs, 1)
      loss.backward()
      optimizer.step()


      running_loss += loss.item() * inputs.size(0)
      running_corrects += torch.sum(preds == labels.data)

      epoch_loss = running_loss / len(train_loader.dataset)
      epoch_acc = running_corrects.double() / len(train_loader.dataset)

      if c%10 == 0:
        print(f"Progress: {c/len(train_loader)*100:.2f}%")
        print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))



Epoch 0/9
----------
Progress: 0.00%
Loss: 0.0018 Acc: 0.0059
Progress: 6.06%
Loss: 0.0290 Acc: 0.0560
Progress: 12.12%
Loss: 0.0596 Acc: 0.1046
Progress: 18.18%
Loss: 0.0859 Acc: 0.1552
Progress: 24.24%
Loss: 0.1150 Acc: 0.2051
Progress: 30.30%
Loss: 0.1422 Acc: 0.2564
Progress: 36.36%
Loss: 0.1686 Acc: 0.3066
Progress: 42.42%
Loss: 0.1974 Acc: 0.3560
Progress: 48.48%
Loss: 0.2253 Acc: 0.4065
Progress: 54.55%
Loss: 0.2479 Acc: 0.4583
Progress: 60.61%
Loss: 0.2736 Acc: 0.5095
Progress: 66.67%
Loss: 0.2987 Acc: 0.5605
Progress: 72.73%
Loss: 0.3184 Acc: 0.6142
Progress: 78.79%
Loss: 0.3411 Acc: 0.6672
Progress: 84.85%
Loss: 0.3681 Acc: 0.7171
Progress: 90.91%
Loss: 0.3952 Acc: 0.7677
Progress: 96.97%
Loss: 0.4234 Acc: 0.8175
Epoch 1/9
----------
Progress: 0.00%
Loss: 0.0024 Acc: 0.0049
Progress: 6.06%
Loss: 0.0321 Acc: 0.0556
Progress: 12.12%
Loss: 0.0565 Acc: 0.1078
Progress: 18.18%
Loss: 0.0837 Acc: 0.1594
Progress: 24.24%
Loss: 0.1109 Acc: 0.2099
Progress: 30.30%
Loss: 0.1383 Acc: 0.2

In [19]:
torch.save(model, datadir+"resnet18_model.pt")


In [20]:
test_dataset = torchvision.datasets.ImageFolder(
        root=datadir+"greenstand_test/",
        transform=transforms)

test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=16,
        num_workers=20,
        shuffle=True)


In [21]:
from sklearn.metrics import classification_report
model.eval()


for loader, title in zip([train_loader, test_loader], ["Train", "Test"]):
  target = []
  predicted = []
  for inputs, labels in loader:
      inputs = inputs.to(device)
      #labels = labels.to(device)

      predicted += model(inputs).argmax(1).tolist()
      target += labels.tolist()
  print("Performance on the {} set".format(title))
  print(classification_report(target, predicted, target_names = test_dataset.class_to_idx.keys()))


Performance on the Train set
                   precision    recall  f1-score   support

  cordia africana       0.94      0.98      0.96      1099
           croton       0.86      0.67      0.75      1041
       eucalyptus       0.73      0.87      0.79      1020
         gravilia       0.89      0.91      0.90      1062
rauvolfia cafffra       0.92      0.90      0.91      1048

         accuracy                           0.87      5270
        macro avg       0.87      0.87      0.86      5270
     weighted avg       0.87      0.87      0.87      5270

Performance on the Test set
                   precision    recall  f1-score   support

  cordia africana       0.84      0.80      0.82        20
           croton       0.67      0.60      0.63        20
       eucalyptus       0.69      0.90      0.78        20
         gravilia       0.86      0.90      0.88        20
rauvolfia cafffra       0.94      0.75      0.83        20

         accuracy                           0.79     