In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import torchvision.models as models
import torch.nn as nn
from sklearn.model_selection import train_test_split
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import json

# Load data array and create dataframe

In [None]:
!unzip 'train.zip'
!unzip 'label.zip'

In [4]:
npzfile = np.load('train_data.npz')
x_arr = npzfile['x']
y_arr = npzfile['y']

print(x_arr.shape)
print(y_arr.shape)

(3777,)
(3777,)


In [5]:
dic = {
    "filename": x_arr, 
    "label": y_arr,
}
df = pd.DataFrame(dic)

In [None]:
print(df.head(5))

# Hyperparameters

In [6]:
hyper = {
    "batch_size": 32,
    "img_size": 285,
    "class_num": 8,
    "learning_rate": 1e-4,
    "step_size": [36, 40, 44],
    "epoch": 50,
}

# Define Dataset

In [7]:
class fish_dataset(Dataset):
    def __init__(self, dataframe, training=False, rootPath='', img_size=hyper["img_size"]):
        self.rootPath = rootPath
        self.dataframe = dataframe
        self.img_size = img_size
        self.training = training

    def __getitem__(self, index):
        if self.training and random.random() < 0.8:
          img, label = self.copy_paste(index)
        else:
          img, label = self.load_from_dataset(index)
        img = self.trans_img(img) 
        return img, label 

    def __len__(self):
        return len(self.dataframe.index)
    
    def trans_img(self, img):
        if self.training == True:
            transform = transforms.Compose([
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
                transforms.Resize((self.img_size, self.img_size)),
                transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10, interpolation=transforms.InterpolationMode.BILINEAR),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 
                ])
        else:
            transform = transforms.Compose([
                transforms.Resize((self.img_size,self.img_size)),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 
                ])
        
        return  transform(img)

    def load_from_dataset(self, index):
      filename = self.dataframe.iloc[index, 0]
      label = self.dataframe.iloc[index, 1]
      img = Image.open(self.rootPath + filename)
      img = img.convert('RGB')
      return img, label

    def copy_paste(self, index):
      classes = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']
      target = random.sample(classes, 1)[0]
      if target == 'NoF':
        # mask the fish part in an image of other category
        classes2 = ['ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']
        source = random.sample(classes2, 1)[0]
        source_dir = os.path.join('/content/train', source)
        files = [x for x in sorted(os.listdir(source_dir))]
        filename = random.sample(files, 1)[0]
        img = Image.open(os.path.join(source_dir, filename))
        labelname = filename[:-4] + '.json'
        if not os.path.isfile(os.path.join('/content/label/' + source, labelname)):
          return self.load_from_dataset(index)
        with open(os.path.join('/content/label/' + source, labelname)) as json_file:
          labels = json.load(json_file)
        for l in labels["shapes"]:
          xmin = int(min((l["points"][0][0], l["points"][1][0])))
          xmax = int(max((l["points"][0][0], l["points"][1][0])))
          ymin = int(min((l["points"][0][1], l["points"][1][1])))
          ymax = int(max((l["points"][0][1], l["points"][1][1])))
          mask = Image.new("RGB", (xmax-xmin, ymax-ymin), (0, 0, 0))
          img.paste(mask, (xmin, ymin))
      else:
        # paste fish patches of one category on a NoF image
        source_dir = os.path.join('/content/train', target)
        files = [x for x in sorted(os.listdir(source_dir))]
        filename = random.sample(files, 1)[0]
        fish_img = Image.open(os.path.join(source_dir, filename))
        labelname = filename[:-4] + '.json'
        if not os.path.isfile(os.path.join('/content/label/' + target, labelname)):
          return self.load_from_dataset(index)
        with open(os.path.join('/content/label/' + target, labelname)) as json_file:
          labels = json.load(json_file)

        nof_dir = os.path.join('/content/train', 'NoF')
        background_files = [x for x in sorted(os.listdir(nof_dir))]
        background_filename = random.sample(background_files, 1)[0]
        img = Image.open(os.path.join(nof_dir, background_filename))
        for l in labels["shapes"]:
          xmin = int(min((l["points"][0][0], l["points"][1][0])))
          xmax = int(max((l["points"][0][0], l["points"][1][0])))
          ymin = int(min((l["points"][0][1], l["points"][1][1])))
          ymax = int(max((l["points"][0][1], l["points"][1][1])))
          fish_patch = fish_img.crop((xmin, ymin, xmax, ymax))
          width, height = img.size
          x = random.randint(0, width - (xmax - xmin) + 1)
          y = random.randint(0, height - (ymax - ymin) + 1) 
          img.paste(fish_patch, (x, y))

      return img, classes.index(target)


# Create dataset

In [8]:
seed = 43

train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    random_state=seed,
    stratify=df['label']
)

root = '/content/train'
train_data = fish_dataset(df, rootPath=root, training=True)
train_dataloader = DataLoader(train_data, batch_size=hyper["batch_size"], shuffle=True)

In [9]:
print(len(train_data))

3777


# Model

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
#model = models.resnet101(pretrained=True)
model = models.regnet_y_8gf(pretrained=True)
num_ftrs = model.fc.in_features # number of neuron that input to last FC
model.fc = nn.Linear(num_ftrs, hyper["class_num"])
model = model.to(device)

# Optimizer

In [12]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=hyper["learning_rate"], momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=hyper["step_size"], gamma=0.8, verbose=True)

Adjusting learning rate of group 0 to 1.0000e-04.


# Training

In [13]:
def plot_loss(train_loss_history, epoch):
  epoch_history = [*range(0, epoch+1, 1)]
  line1, = plt.plot(epoch_history, train_loss_history ,label = 'Training')
  plt.legend(handles = [line1])
  plt.xlabel('epochs')
  plt.ylabel('loss')
  plt.savefig('loss.png')
  plt.show()

In [None]:
log_path = 'log.txt'
train_loss_history=[]

for epoch in range(hyper["epoch"]):

  """=====train one epoch====="""
  model.train()
  totalLoss = 0
  count = 0
  correct_count = 0
  for x, label in train_dataloader:
      x = x.to(device)
      label = label.to(device).type(torch.long)
      optimizer.zero_grad()
      output = model(x)
      loss = criterion(output, label)
      _, predicted = torch.max(output.data, 1)
      count += len(x)
      correct_count += (predicted == label).sum().item()
      totalLoss += loss.item()*len(label)
      loss.backward()
      optimizer.step()

  train_loss = totalLoss / count
  accuracy = correct_count / count
  train_loss_history.append(train_loss)

  with open(log_path, 'a') as f:
      f.write("Epoch {}: Training Loss: {:.4f}, accuracy: {:.4f}%\n".format(epoch+1, train_loss, 100*accuracy))
  print("Epoch {}: Training Loss: {:.4f}, accuracy: {:.4f}%".format(epoch+1, train_loss, 100*accuracy))

  torch.save(model.state_dict(), "model_ep{}_loss{:.4f}.pkl".format(epoch+1, train_loss))

  print("-------")

  plot_loss(train_loss_history, epoch)
  lr_scheduler.step()