In [1]:
import pandas as pd
import ast, os
import numpy as np
import torch
import torchvision
import cv2
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import DataLoader, Dataset

In [2]:
training_path = "../data/train.csv"
df = pd.read_csv(training_path)
df.head()

Unnamed: 0,image_id,width,height,bbox,source
0,b6ab77fd7,1024,1024,"[834.0, 222.0, 56.0, 36.0]",usask_1
1,b6ab77fd7,1024,1024,"[226.0, 548.0, 130.0, 58.0]",usask_1
2,b6ab77fd7,1024,1024,"[377.0, 504.0, 74.0, 160.0]",usask_1
3,b6ab77fd7,1024,1024,"[834.0, 95.0, 109.0, 107.0]",usask_1
4,b6ab77fd7,1024,1024,"[26.0, 144.0, 124.0, 117.0]",usask_1


In [3]:

df['bbox'] = df['bbox'].apply(lambda x: ast.literal_eval(x))
print(type(df['bbox'][0]))


<class 'list'>


In [4]:
bbox_list = np.array(list(df['bbox']))

for i,dimension in enumerate(['x','y','w','h']):
    df[dimension] = bbox_list[:,i]
df.head()

Unnamed: 0,image_id,width,height,bbox,source,x,y,w,h
0,b6ab77fd7,1024,1024,"[834.0, 222.0, 56.0, 36.0]",usask_1,834.0,222.0,56.0,36.0
1,b6ab77fd7,1024,1024,"[226.0, 548.0, 130.0, 58.0]",usask_1,226.0,548.0,130.0,58.0
2,b6ab77fd7,1024,1024,"[377.0, 504.0, 74.0, 160.0]",usask_1,377.0,504.0,74.0,160.0
3,b6ab77fd7,1024,1024,"[834.0, 95.0, 109.0, 107.0]",usask_1,834.0,95.0,109.0,107.0
4,b6ab77fd7,1024,1024,"[26.0, 144.0, 124.0, 117.0]",usask_1,26.0,144.0,124.0,117.0


In [5]:
image_ids = df['image_id'].unique()
train_ids =  image_ids[0:int(0.8*len(image_ids))]
val_ids =  image_ids[int(0.8*len(image_ids)):]


In [6]:
print("# training images: ",len(train_ids))
print("# validation images: ",len(val_ids))
train_df = df[df['image_id'].isin(train_ids)]
valid_df = df[df['image_id'].isin(val_ids)]

# training images:  2698
# validation images:  675


In [7]:
class WheatDataset(Dataset):
    def __init__(self, dataframe, image_dir, transforms = None):
        super().__init__()
        self.df = dataframe
        self.image_dir = image_dir
        self.images = dataframe['image_id'].unique()
        self.transforms = transforms
        
    def __getitem__(self,idx):
        #print("idx: ",idx)
        image_id = self.images[idx]
        #print("image id: ",image_id)
        record = self.df[self.df['image_id'] == image_id]
        
        
        image_name = image_id + '.jpg'
        
        img = cv2.imread(self.image_dir+image_name, cv2.IMREAD_COLOR)
        
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
        img = img/255.0
        
        boxes = record[['x','y','w','h']].values
        boxes[:,2] = boxes[:,0]+boxes[:,2]
        boxes[:,3] = boxes[:,1]+boxes[:,3]
        boxes = torch.tensor(boxes, dtype=torch.int64)
        img = torch.tensor(img, dtype=torch.int64)
        labels = torch.ones((record.shape[0],), dtype=torch.int64)
        #print("labels: ",type(labels))
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([idx])
        #print("img type: ",type(img))
        return img, target, image_id

    def __len__(self):
        return len(self.images)

     

In [8]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained = True)

In [9]:
num_classes = 2
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [10]:
train_img_path = "../data/train/"
train_dataset = WheatDataset(train_df,train_img_path)
valid_dataset = WheatDataset(valid_df,train_img_path)
print("len train dataset",len(train_dataset))
print("len valid dataset",len(valid_dataset))

def collate_fn(batch):
    return tuple(zip(*batch))

train_data_loader = DataLoader(
    train_dataset, 
    batch_size = 16, 
    shuffle = False, 
    num_workers = 0,
    collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    valid_dataset, 
    batch_size = 16, 
    shuffle = False, 
    num_workers = 4,
    collate_fn=collate_fn
)



len train dataset 2698
len valid dataset 675


In [11]:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda:0


In [12]:
images, targets, ids = next(iter(train_data_loader))
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

In [13]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = 2


In [14]:
#save best model

def save_checkpoint(state, is_best, checkpoint_path, best_model_path):
    """
    state: checkpoint to be saved
    is_best: is this the best checkpoint; min validation loss
    checkpoint_path: path to save checkpoint
    best_model_path: path to save best model
    """
    
    #save the current state to checkpoint path 
    torch.save(state, checkpoint_path)
    
    if is_best:
        shutil.copyfile(checkpoint_path, best_model_path)
        
    def load_checkpoint(checkpoint_path, model, optimizer):
        """
        checkpoint_path: path to save checkpoint
        model: model that we want to load checkpoint parameters into       
        optimizer: optimizer we defined in previous training
        """
        
        #loading checkpoint
        checkpoint = torch.load(checkpoint_path)
        # initialize state_dict from checkpoint to model
        model.load_state_dict(checkpoint['state_dict'])
        # initialize optimizer from checkpoint to optimizer
        optimizer.load_statte_dict(checkpoint['optimizer'])
        # initialize valid_loss_min from checkpoint to valid_loss_min
        valid_loss_min = checkpoint['valid_loss_min']
        return model, optimizer, checkpoint['epoch'], valid_loss_min.item()
    
        

In [15]:
print(type(train_data_loader))

<class 'torch.utils.data.dataloader.DataLoader'>


In [16]:
num_epochs = 2
train_loss_min = 0.9
total_train_loss = []


checkpoint_path = '../models/chkpoint_'
best_model_path = '../models/bestmodel.pt'

for epoch in range(num_epochs):
    print("Epoch: ",epoch)
    model.train()
    
    for images, targets, image_ids in train_data_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k,v in t.items()} for t in targets]
        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())
        train_loss.append(losses.item())        
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
    #train_loss/len(train_data_loader.dataset)
    epoch_train_loss = np.mean(train_loss)
    total_train_loss.append(epoch_train_loss)
    print(f'Epoch train loss is {epoch_train_loss}')
    
#     if lr_scheduler is not None:
#         lr_scheduler.step()
    
    # create checkpoint variable and add important data
    checkpoint = {
            'epoch': epoch + 1,
            'train_loss_min': epoch_train_loss,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
    
    # save checkpoint
    save_ckp(checkpoint, False, checkpoint_path, best_model_path)
    ## TODO: save the model if validation loss has decreased
    if epoch_train_loss <= train_loss_min:
            print('Train loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(train_loss_min,epoch_train_loss))
            # save checkpoint as best model
            save_ckp(checkpoint, True, checkpoint_path, best_model_path)
            train_loss_min = epoch_train_loss
    
    time_elapsed = time.time() - start_time
    print('{:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))


Epoch:  0


RuntimeError: The size of tensor a (1024) must match the size of tensor b (3) at non-singleton dimension 0