<a href="https://colab.research.google.com/github/yukinaga/object_detection/blob/main/section_2/01_faster_rcnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### annotationデータ変換

In [1]:
import glob
import os
import json
import pandas as pd
import shutil
import json
import numpy as np
from tqdm import tqdm
from PIL import Image

In [2]:
# jsonファイルの読み込み
base_dir = 'datasets/COCOdata/COCOdata'
json_dicts = {}
data_kinds = ['train', 'test']
for data_kind in data_kinds:
    with open(f'{base_dir}/{data_kind}/labels_baobab.json', 'r') as f:
        json_dicts[data_kind] = json.load(f)

In [3]:
# imagesキー内の'id'と'file_name'と'width'と'height'を紐づけ
df_dict = {}
for data_kind, json_dict in json_dicts.items():
    images = json_dict['images']
    images_id = []
    for image in tqdm(images):
        image_id = {}
        image_id['image_id'] = image['id']
        image_id['file_name'] = image['file_name']
        img = Image.open(f'{base_dir}/{data_kind}/data/{image["file_name"]}')
        image_id['width'] = img.width
        image_id['height'] = img.height
        images_id.append(image_id)
    df_dict[data_kind] = pd.DataFrame(images_id)

100%|███████████████████████████████████████████████████████████████████████████| 74963/74963 [04:51<00:00, 257.48it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 3182/3182 [00:13<00:00, 239.40it/s]


In [6]:
# annotationsキー内の'id'と'bbox'と'category_id'を紐づけ
for data_kind,json_dict in json_dicts.items():
    annotations = json_dict['annotations']
    annotations_id = []
    for annotation in annotations:
        annotation_id = {}
        annotation_id['image_id'] = annotation['image_id']
        annotation_id['bbox'] = annotation['bbox']
        annotation_id['category_id'] = annotation['category_id']
        annotations_id.append(annotation_id)
    df_annotations_id = pd.DataFrame(annotations_id)
    df_dict[data_kind] = pd.merge(df_dict[data_kind], df_annotations_id, on='image_id')

In [8]:
# categoriesキー内の'id'と'name'を紐づけ
for data_kind,json_dict in json_dicts.items():
    categories = json_dict['categories']
    categories_id = []
    for category in categories:
        category_id = {}
        category_id['category_id'] = category['id']
        category_id['name'] = category['name']
        categories_id.append(category_id)
    df_categories_id = pd.DataFrame(categories_id)
    df_dict[data_kind] = pd.merge(df_dict[data_kind], df_categories_id, on='category_id')

In [10]:
# カテゴリ番号と名前を取り出したデータフレーム
df_unique_category = df_dict['test'][['category_id', 'name']].drop_duplicates()
df_unique_category = df_unique_category.sort_values('category_id')
df_unique_category = df_unique_category.reset_index(drop=True)
df_unique_category.index = np.arange(1, len(df_unique_category)+1)
df_unique_category

Unnamed: 0,category_id,name
1,1,person
2,3,car
3,10,traffic light
4,47,cup
5,62,chair


In [None]:
fix_label = {
    "building": "person",
    "car": "car",
    "person": "trafficlight",
    "flower": "cup",
    "dog": "chair"
}

In [12]:
# もともとのカテゴリIDとラベルIDの変換
category_id2label_id = {}
for label_id, row in df_unique_category.iterrows():
    category_id2label_id[row['category_id']] = label_id
category_id2label_id[10]

3

In [13]:
# ラベルIDからNameを取得
label_id2name = {}
for label_id, row in df_label.iterrows():
    label_id2name[label_id] = row['name']
label_id2name[3]

'trafficlight'

In [14]:
columns = df_dict['test'].columns.tolist()

In [None]:
# train, valでデータフレームを分ける
# かなり時間かかるから注意！
train_txt = f'{base_dir}/train/train.txt'
val_txt = f'{base_dir}/train/val.txt'
with open(train_txt, 'r') as f:
    train_list = f.readlines()
    train_list = [train_name.split('/')[-1].replace('\n', '') for train_name in train_list]
with open(val_txt, 'r') as f:
    val_list = f.readlines()
    val_list = [val_name.split('/')[-1].replace('\n', '') for val_name in val_list]

df_train = pd.DataFrame(columns=columns)
for train_name in tqdm(train_list):
    _df = df_dict['train'][df_dict['train']['file_name'] == train_name]
    df_train = pd.concat([df_train, _df],ignore_index=True)
df_train

In [None]:
df_val = pd.DataFrame(columns=columns)
for val_name in tqdm(val_list):
    _df = df_dict['train'][df_dict['train']['file_name'] == val_name]
    df_val = pd.concat([df_val, _df],ignore_index=True)
df_val

In [None]:
df_dict['train'] = df_train
df_dict['val'] = df_val

### カスタムデータセットを作成

In [None]:
import torch
from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transforms
from torchvision.utils import draw_bounding_boxes
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# カスタムデータセットに与えるデータを作成するクラス
class MSCOCODataset(torch.utils.data.Dataset):
    
    def __init__(self, df, image_dir, transform=None):
        super().__init__()
        self.image_ids = df['file_name'].unique()
        self.df = df
        self.image_dir = image_dir
        self.transform = transform
        
    def __getitem__(self, index):
        if self.transform is None:
            self.transform = transforms.Compose([
                transforms.ToTensor()
            ])

        # 入力画像の読み込み
        image_id = self.image_ids[index]
        image = Image.open(f'{self.image_dir}/{image_id}')
        image = self.transform(image)
        
        # アノテーションデータの読み込み
        df_unique_id = self.df[self.df['file_name'] == image_id]
        width = df_unique_id['width'].unique()[0]
        height = df_unique_id['height'].unique()[0]
        boxes_arr = np.array([boxes for boxes in df_unique_id['bbox'].values])
        boxes_arr[:, 0] = boxes_arr[:, 0]
        boxes_arr[:, 1] = boxes_arr[:, 1]
        boxes_arr[:, 2] = boxes_arr[:, 0] + boxes_arr[:, 2]
        boxes_arr[:, 3] = boxes_arr[:, 1] + boxes_arr[:, 3]
        boxes = torch.tensor(boxes_arr, dtype=torch.int64)
        for box in boxes:
            box[2] = box[2] + 1 if (box[2] - box[0]) == 0 else box[2]
            box[3] = box[3] + 1 if (box[3] - box[1]) == 0 else box[3]
        labels = [category_id2label_id[cat_id] for cat_id in df_unique_id['category_id'].values]
        labels = np.array(labels)
        labels = torch.tensor(labels, dtype=torch.int64)
        target = {}
        target['boxes'] = boxes
        target['labels']= labels
        
        return image, target
    
    def __len__(self):
        return self.image_ids.shape[0]

### データセットの読み込み

In [None]:
dataset_train = MSCOCODataset(df_dict['train'], image_dir=f'{base_dir}/train/data')
dataset_val = MSCOCODataset(df_dict['val'], image_dir=f'{base_dir}/train/data')
dataset_test = MSCOCODataset(df_dict['test'], image_dir=f'{base_dir}/test/data')

### DataLoaderの設定

In [None]:
data_loader_train =  DataLoader(dataset_train, batch_size=1, shuffle=True)
data_loader_val =  DataLoader(dataset_val, batch_size=1, shuffle=False)
data_loader_test =  DataLoader(dataset_test, batch_size=1, shuffle=False)

### ターゲットの表示 

In [None]:
def show_boxes(image, boxes, names):
    drawn_boxes = draw_bounding_boxes(image, boxes, labels=names)

    plt.figure(figsize = (16,16))
    plt.imshow(np.transpose(drawn_boxes, (1, 2, 0)))  # チャンネルを一番後ろに
    plt.tick_params(labelbottom=False, labelleft=False, bottom=False, left=False)  # ラベルとメモリを非表示に
    plt.show()

In [None]:
dataiter = iter(data_loader_train)  # イテレータ

In [None]:
len(data_loader_test)

In [None]:
#dataiter = iter(data_loader_test)  # イテレータ
image, target = dataiter.next()  # バッチを取り出す
print(target)

image = image[0]
image = (image*255).to(torch.uint8)  # draw_bounding_boxes関数の入力は0-255

boxes = target["boxes"][0]

labels = target["labels"][0]
names = [label_id2name[label.item()] for label in labels]

show_boxes(image, boxes, names)

### モデルの構築

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes=len(df_unique_category)+1  # 背景も含めて分類するため1を加える
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.cuda()  # GPU対応

### 学習

In [None]:
# モデルの保存場所の設定
data_path = 'save_model'
# ディレクトリを確認して作成
_dir_path = f'{data_path}'
if not os.path.exists(_dir_path):
    os.mkdir(_dir_path)
epoch_loss_path = f'{data_path}/losses_every_epoch.txt'
iter_loss_path = f'{data_path}/losses_every_iteration.txt'

In [None]:
# 最適化アルゴリズム
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)

epoch_loss_list = []
epoch_val_loss_list = []
iter_loss_list = []
val_loss_list = []
model.train()  # 訓練モード
epochs = 10
iteration = 0
for epoch in range(epochs):
    # train
    epoch_losses = []
    iter_losses = []
    for i, (image, target) in enumerate(data_loader_train):
        image = [img.cuda() for img in image]  # GPU対応

        boxes = target["boxes"][0].cuda()
        labels = target["labels"][0].cuda()
        target = [{"boxes":boxes, "labels":labels}]  # ターゲットは辞書を要素に持つリスト

        loss_dic = model(image, target)
        loss = sum(loss for loss in loss_dic.values())  # 誤差の合計を計算

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_losses.append(loss.item())
        iter_losses.append(loss.item())

        if (i+1)%100 == 0:  # 100回ごとに経過を表示
            print(f'Train epoch: {epoch+1} iteration: {i+1}, loss: {loss.item()}')
    
        # 1000iterationごとにmodelを保存
        if (i+1)%1000 == 0:
            model_path = f'{data_path}/save_model/model_iteration{i:06}.pth'
            torch.save(model.state_dict(), model_path)
            iter_loss_list.append(np.mean(iter_losses))
            iter_losses = [] 
            with open(iter_loss_path, 'w') as f:
                for i, loss in enumerate(iter_loss_list):
                    f.write(f'{(i+1)*1000} train_loss: {loss}\n')
            
    # 1epoch終わったらmodelを保存
    model_path = f'{data_path}/save_model/model_epoch{epoch:02}.pth'
    torch.save(model.state_dict(), model_path)
    epoch_loss_list.append(np.mean(epoch_losses))

    # validation
    val_losses =  []
    for i, (val_image, val_target) in enumerate(data_loader_val):
        val_image = [img.cuda() for img in val_image]  # GPU対応

        val_boxes = val_target["boxes"][0].cuda()
        val_labels = val_target["labels"][0].cuda()
        val_target = [{"boxes":val_boxes, "labels":val_labels}]  # ターゲットは辞書を要素に持つリスト

        val_loss_dic = model(val_image, val_target)
        val_loss = sum(val_loss for val_loss in val_loss_dic.values()) # 誤差の合計を計算

        val_losses.append(val_loss.item())

        if (i+1)%100 == 0:  # 100回ごとに経過を表示
            print("Val epoch:", epoch,  "iteration:", i,  "loss:", val_loss.item())
    
    val_loss_list.append(np.mean(val_losses))

    with open(epoch_loss_path, 'w') as f:
        for i, loss in enumerate(epoch_loss_list):
            f.write(f'{i+1} train_loss: {loss} val_loss: {val_loss_list[i]}\n')
   

### 検出

In [None]:
load_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes=len(df_unique_category)+1  # 背景も含めて分類するため1を加える
in_features = load_model.roi_heads.box_predictor.cls_score.in_features
load_model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
load_model_pth = f'{data_path}/save_model/model_epoch1.pth'
load_model.load_state_dict(torch.load(load_model_pth))
load_model.cuda()

In [None]:
dataiter = iter(data_loader_test)  # イテレータ

In [None]:
# 1枚ずつ検出テスト
image, target = dataiter.next()  # バッチを取り出す

image = [img.cuda() for img in image]  # GPU対応

load_model.eval()
predictions = load_model(image)
pred = predictions[0]

scores = pred['scores'].cpu().tolist()
boxes = pred['boxes'].cpu().tolist()
labels = pred['labels'].cpu().tolist()
names = [label_id2name[label] for label in labels]

iou50_scores = []
iou50_boxes = []
iou50_labels = []
iou50_names = []

for i, score in enumerate(scores):
    if score >= 0.5:
        iou50_scores.append(score)
        iou50_boxes.append(boxes[i])
        iou50_labels.append(labels[i])
        iou50_names.append(names[i])
        print(f'{names[i]} {labels[i]} {score:.02f} {boxes[i][0]:.01f} {boxes[i][1]:.01f} {boxes[i][2]:.01f} {boxes[i][3]:.01f}')
iou50_boxes = torch.tensor(iou50_boxes)

image = (image[0]*255).to(torch.uint8).cpu()
show_boxes(image, iou50_boxes, iou50_names)


In [None]:
# 検出結果の出力先設定
dets_dir_path = f'{data_path}/dets_result'
# ディレクトリを確認して作成
if not os.path.exists(dets_dir_path):
    os.mkdir(dets_dir_path)
img_names = df_dict['test']['file_name'].unique()

In [None]:
# 検出結果をファイルに出力
for i, (image, target) in enumerate(data_loader_test):

    image = [img.cuda() for img in image]  # GPU対応
    img_name = img_names[i]
    # うまく読めなかった画像(ImageID)をスキップ
    #if img_name == 'a8601676c86366e3':
    #    continue
    print(f'{i}: {img_name}')
    load_model.eval()
    predictions = load_model(image)
    pred = predictions[0]

    scores = pred['scores'].cpu().tolist()
    boxes = pred['boxes'].cpu().tolist()
    labels = pred['labels'].cpu().tolist()
    names = [label_id2name[label] for label in labels]

    iou50_scores = []
    iou50_boxes = []
    iou50_labels = []
    iou50_names = []

    img_name = img_name.split('.')[0]
    dets_file_path = f'{dets_dir_path}/{img_name}.txt'
    with open(dets_file_path, 'w') as f:
        for i, score in enumerate(scores):
            if score >= 0.5:
                iou50_scores.append(score)
                iou50_boxes.append(boxes[i])
                iou50_labels.append(labels[i])
                iou50_names.append(names[i])
                pred_data = f'{names[i]} {score:.03f} {int(boxes[i][0])} {int(boxes[i][1])} {int(boxes[i][2])} {int(boxes[i][3])}'
                f.write(f'{pred_data}\n')
                print(pred_data)