In [1]:
import os
import re
import json
from pathlib import Path
import cv2
import collections
import shutil
import ssl 
import copy
import math
from tqdm import tqdm
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from datetime import datetime 
import albumentations as A
import cv2 as cv 
from sklearn.model_selection import StratifiedKFold, train_test_split
from PIL import Image
from albumentations.pytorch.transforms import ToTensorV2
import torch
from torch import tensor
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset,DataLoader,random_split,Subset
from torchvision import datasets,models,transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
ssl._create_default_https_context = ssl._create_unverified_context 
import PIL           

**Background** :Australia's stunningly beautiful Great Barrier Reef is under threat because of the overpopulation of one particular starfish – the coral-eating crown-of-thorns starfish.Scientists, tourism operators and reef managers established a large-scale intervention program to control COTS outbreaks to ecologically sustainable levels.

**Objective** :Accurately identify starfish in real-time by building an object detection model trained on underwater videos of coral reefs.


**Type** :Object detection.

**Scale**: 4919

**Evaluation**: F2 Score

In [3]:
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"]= "1"

In [4]:
os.environ["OMP_NUM_THREADS"] = "1"

In [5]:
path = Path('/Users/wangshuo/Documents/data/tensorflow-great-barrier-reef/')  

In [6]:
# images
folders = [path/'train_images'/'video_0', path/'train_images'/'video_1', path/'train_images'/'video_2']
dest_folder = path/'images'

for folder in folders:
    prefix = str(folder)[-1] + '-'
    for file in os.listdir(folder):
        old_file = os.path.join(folder, file)
        new_file = os.path.join(folder, prefix + file)
        os.rename(old_file, new_file)

for folder in folders:
    for file in os.listdir(folder):
        src = os.path.join(folder, file)
        shutil.copy(src, dest_folder)

In [6]:
# transform
train_transform = A.Compose([
    A.Flip(0.5),
    ToTensorV2()
], bbox_params = {'format': 'pascal_voc', 'label_fields': ['labels']})

test_transform = A.Compose([
    ToTensorV2(),
], bbox_params = {'format': 'pascal_voc', 'label_fields': ['labels']}) 

In [7]:
# data
# df 
train = pd.read_csv(path/'train.csv')

train = train[train.annotations != '[]'].reset_index(drop=True)

x, y, w, h = [], [], [], []
for a, b in enumerate(train.annotations):
    boxes_list = json.loads(b.replace("'", '"'))
    _x, _y, _w, _h = [], [], [], []
    for box in boxes_list:
        _x.append(box['x'])
        _y.append(box['y'])
        _w.append(box['width'])
        _h.append(box['height'])
    x.append(_x)
    y.append(_y)
    w.append(_w)
    h.append(_h) 
train['x'] = x
train['y'] = y
train['w'] = w
train['h'] = h

def xmax(df):
    return [x + y for x, y in zip(df.x, df.w)]
def ymax(df):
    return [x + y for x, y in zip(df.y, df.h)]
def k(df):
    return sum(np.array(df.xmax)>1280) + sum(np.array(df.ymax)>720)

train['xmax'] = train.apply(xmax,axis=1)
train['ymax'] = train.apply(ymax,axis=1)
train['valid_boxes'] = train.apply(k,axis=1)

train = train[~(train.valid_boxes>0)].reset_index(drop=True)

train, test = train_test_split(train, test_size=0.25)

# dataset
class StarfishDataset(Dataset):
    
    def __init__(self, df, img_dir, transform):
        super().__init__()
        
        self.df = df
        self.img_ids = df.image_id
        self.img_dir = img_dir
        self.transform = transform
    
    def __getitem__(self, index):
        
        img_id = self.df.iloc[index]['image_id']
        d = self.df[self.df['image_id'] == img_id] 
        
        image = cv2.imread(f'{self.img_dir}/{img_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        
        b = d[['x', 'y', 'w', 'h']].values
        boxes = []
        for i in range(len(b[0][0])):
            boxes.append([[box[0][i], box[1][i], box[2][i]+box[0][i], box[3][i]+box[1][i]] for box in b][0])
        
        area =[(x[2]-x[0])*(x[3]-x[1]) for x in boxes]
        area = torch.tensor(area)
        
        labels = torch.ones((torch.tensor(boxes).shape[0],),dtype=torch.int64)
        
        iscrowd = torch.zeros(torch.tensor(boxes).shape[0]) 
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['img_id'] = torch.tensor([index])
        target['area'] = area
        target['iscrowd'] = iscrowd
        
        p = {'image': image, 'bboxes': boxes, 'labels': labels}
        res = self.transform(**p)
        image = res['image']
        target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*res['bboxes'])))).permute(1, 0)
        target['boxes'] = target['boxes'].to(torch.float32)
        return image, target, img_id
    
    def __len__(self):
        return self.img_ids.shape[0] 

train_data = StarfishDataset(df=train,img_dir=path/'images',transform=train_transform)
test_data = StarfishDataset(df=test,img_dir=path/'images',transform=test_transform)

train_loader = DataLoader(train_data,batch_size=8,shuffle=True,collate_fn=lambda batch: tuple(zip(*batch))) 
test_loader = DataLoader(test_data,batch_size=8,shuffle=True,collate_fn=lambda batch: tuple(zip(*batch)))  

In [None]:
# sampel
images, targets, img_ids = next(iter(train_loader))
boxes = targets[2]['boxes'].cpu().numpy().astype(np.int32)
img = images[2].permute(1,2,0).cpu().numpy() 

fig, ax = plt.subplots(1, 1, figsize=(16, 8))
for box in boxes:
    cv2.rectangle(img,
                  (box[0], box[1]),
                  (box[2], box[3]),
                  (255, 0, 255), 2)
    
ax.set_axis_off()
ax.imshow(img)

In [16]:
# model
model = models.detection.fasterrcnn_resnet50_fpn(
    weights=models.detection.faster_rcnn.FasterRCNN_ResNet50_FPN_Weights.DEFAULT,
    rpn_pre_nms_top_n_train=16)

    
in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)  

# train
model.to('mps')

params = [p for p in model.parameters() if p.requires_grad]

optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9, weight_decay=0.0005)

In [None]:
model.train()

In [13]:
a = model([train_data[0][0]],[train_data[0][1]])

In [None]:
a = model([train_data[0][0].to('mps')],[{k: v.to('mps') for k, v in train_data[0][1].items()}])

In [None]:
for imgs, targets, img_ids in tqdm(train_loader):
    imgs = list(img.to('mps') for img in imgs)
    targets = [{k: v.to('mps') for k, v in t.items()} for t in targets]
    loss_dic = model(imgs,targets)
    loss = sum(loss for loss in loss_dic.values())
    
    optimizer.zero_grad() 
    loss.backward()
    optimizer.step()

In [None]:
for epoch in range(10):
    
    

In [None]:
# git
!git init

!git remote -v

!git remote set-url origin "https://github.com/0731ws/portfolio.git/tensorflow-great-barrier-reef"

!git remote add origin https://github.com/0731ws/portfolio.git  

!git add "starfish.ipynb"

!git commit -m "Initial commit"

!git commit -m "Upload notebook to folder"

!git push -u origin master

!git config --global user.name "WangShuo"

!git config --global user.email "ws13127789446@163.com"

!git push -u origin main

!git pull origin main

!git config --global http.version HTTP/1.1

!git pull --rebase origin main

git config --global --unset http.proxy
git config --global --unset https.proxy

In [None]:
git stash

In [None]:
git ls-files -c 

In [None]:
git branch -D 

In [None]:
git rm --cached

In [None]:
git ls-files --stage

In [None]:
git push origin --delete dev

In [None]:
git reset HEAD

In [None]:
git reset --soft HEAD^ 

In [None]:
git push origin main -f

In [None]:
git rm -r --cached <folder_name>
git commit -m "Removed folder <folder_name> from remote repository"
git push origin <branch_name>