# Combine COCO

In [357]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('.')
sys.path.append('..')
import os
import sys
from math import ceil, floor
import numpy as np
import cv2
import pandas as pd
import json
import glob
import shutil
from sklearn.model_selection import train_test_split
from pathlib import Path
import logging
from common.log import init_logging
from common.utils import get_data_dir
DATA_DIR = get_data_dir()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Annotations

In [359]:
LABEL_DIR = os.path.join(DATA_DIR, 'f45_label_MaskRCNN')
IMG_DIR = os.path.join(DATA_DIR, 'f45_output')
transform_folder='0_not_train_labels'
label_folder = os.path.join(DATA_DIR, transform_folder)
json_paths = os.path.join(label_folder, '*.json')
coco_labels = glob.glob(json_paths)

In [360]:
#Get a sample annotation jsom
with open(coco_labels[0], newline='') as f:
    sample_json = json.load(f)

#讀出所有json, 合併成一個
image_shift=0
annotation_shift=0
new_image_list = []
new_annotation_list = []
image_list = []
image_id = []
for label_path in coco_labels:
    with open(label_path, newline='') as f:
        data_json = json.load(f)
        raw_annotations = data_json['annotations']
        raw_images = data_json['images']
        
        annotations = []
        valid_img_ids = []
        for ann in raw_annotations:
            if ann['category_id'] in [4,5,6]:
                valid_img_ids.append(ann['image_id'])
                annotations.append(ann)

        images = []
        for img in raw_images:
            if img['id'] in valid_img_ids:
                images.append(img)        
        
        
        for ann in annotations:
            ann['image_id'] += image_shift
            ann['id'] += annotation_shift
        for img in images:
            img['id'] += image_shift
            img_path = os.path.join(IMG_DIR, img['file_name'])
            img['file_name'] = os.path.basename(img['file_name'])
            image_list.append(img_path)
            image_id.append(img['id'])
            
        image_shift = image_shift + len(raw_images)
        annotation_shift = annotation_shift + len(raw_annotations)
    new_image_list = new_image_list + images
    new_annotation_list = new_annotation_list + annotations
sample_json['images'] = new_image_list
sample_json['annotations'] = new_annotation_list    

annotation_dir = os.path.join(DATA_DIR,'f45coco/annotations')
if not os.path.exists(annotation_dir):
    os.makedirs(annotation_dir)
annotation_path = os.path.join(annotation_dir,'instances_train.json')  
with open(annotation_path, 'w') as f:
    json.dump(sample_json, f)
    print(f'Save train annotation to {annotation_path}')
annotation_path = os.path.join(annotation_dir,'instances_val.json')  
with open(annotation_path, 'w') as f:
    json.dump(sample_json, f)
    print(f'Save val annotation to {annotation_path}')
    


Save train annotation to /mnt/hdd1/Data/f45movement/f45coco/annotations/instances_train.json
Save val annotation to /mnt/hdd1/Data/f45movement/f45coco/annotations/instances_val.json


### Images

In [361]:
image_list_tr, image_list_val = train_test_split(image_list, test_size=0.15)
image_dir_tr = os.path.join(DATA_DIR,'f45coco/images/train')
if not os.path.exists(image_dir_tr):
    os.makedirs(image_dir_tr)
image_dir_val = os.path.join(DATA_DIR,'f45coco/images/val')
if not os.path.exists(image_dir_val):
    os.makedirs(image_dir_val)
    
new_image_list_tr=[]    
for img_path in image_list_tr:
    file_name = os.path.basename(img_path)
    new_path = os.path.join(image_dir_tr, file_name)
    shutil.copyfile(img_path, new_path)
    new_image_list_tr.append(new_path.replace('/mnt/hdd1/Data/f45movement/f45coco','.'))
    
new_image_list_val=[]        
for img_path in image_list_val:
    file_name = os.path.basename(img_path)
    new_path = os.path.join(image_dir_val, file_name)
    shutil.copyfile(img_path, new_path)
    new_image_list_val.append(new_path.replace('/mnt/hdd1/Data/f45movement/f45coco','.'))
    
txt_path_tr = os.path.join(DATA_DIR, 'f45coco/train.txt')
txt_path_val = os.path.join(DATA_DIR, 'f45coco/val.txt')
pd.Series(new_image_list_tr).to_csv(txt_path_tr, header=None, index=None)
pd.Series(new_image_list_val).to_csv(txt_path_val, header=None, index=None)
print(f'txt_path_tr {txt_path_tr}')
print(f'txt_path_val {txt_path_val}')


txt_path_tr /mnt/hdd1/Data/f45movement/f45coco/train.txt
txt_path_val /mnt/hdd1/Data/f45movement/f45coco/val.txt


## Labels

In [362]:

label_dict = {
    'battery': 0,
    'vpen': 1,
    'black': 2,
}

debug_log_list =[]
with open('/mnt/hdd1/Data/f45movement/f45coco/annotations/instances_train.json', newline='') as f:
    data = json.load(f)
    
image_name_tr = [os.path.basename(x) for x in image_list_tr]
image_name_val = [os.path.basename(x) for x in image_list_val]    
new_folder_path_tr = os.path.join(DATA_DIR,'f45coco/labels/train')
if not os.path.exists(new_folder_path_tr):
    os.makedirs(new_folder_path_tr)
new_folder_path_val = os.path.join(DATA_DIR,'f45coco/labels/val')
if not os.path.exists(new_folder_path_val):
    os.makedirs(new_folder_path_val)    
    
file_path_dict = pd.DataFrame(data['images']).set_index('id')['file_name'].to_dict()
category_type_dict = pd.DataFrame(data['categories']).set_index('id')['name'].to_dict()
height_dict = pd.DataFrame(data['images']).set_index('id')['height'].to_dict()
width_dict = pd.DataFrame(data['images']).set_index('id')['width'].to_dict()

annotation_df = pd.DataFrame(data['annotations'])
annotation_df['category_type']=annotation_df['category_id'].map(lambda x:category_type_dict[x])
for image_id in annotation_df.image_id.unique():
    img_height = height_dict[image_id]
    img_width = width_dict[image_id]    
    img_annotation_df = annotation_df[(annotation_df['image_id']==image_id)]
    label_list = []
    for r in img_annotation_df.itertuples():
        if r.category_type not in label_dict.keys():
            continue        
        seg_points = r.segmentation[0]
        seg_points[0::2] = np.array(seg_points[0::2])/img_width
        seg_points[1::2] = np.array(seg_points[1::2])/img_height        
        seg_points.insert(0, label_dict[r.category_type])
        label_list.append(seg_points)
    if len(label_list)==0:
        continue
    
    file_path = file_path_dict[image_id]
    img_file_name = os.path.basename(file_path)
    img_path = os.path.join(IMG_DIR, file_path)
    txt_file_name = img_file_name.replace('.jpg','.txt')
    if img_file_name in image_name_tr:
        new_folder_path = new_folder_path_tr
    elif img_file_name in image_name_val:
        new_folder_path = new_folder_path_val
    else:
        assert False, f'assert err: {img_file_name}'
                
    new_txt_path = os.path.join(new_folder_path, txt_file_name)
    with open(new_txt_path, 'w') as f:
        for seg in label_list:
            seg = [str(p) for p in seg]
            text = ' '.join(seg)
            f.write(text+'\n')
    debug_log_list.append((new_folder_path, img_file_name, txt_file_name))

debug_log_df = pd.DataFrame(debug_log_list, columns=['folder','image','json'])                    
print(f'label transformed qty:{len(debug_log_df)}')


label transformed qty:183


Unnamed: 0,folder,image,json
0,/mnt/hdd1/Data/f45movement/f45coco/labels/train,F45_5L10_2022-04-14-03-13-02.0.jpg,F45_5L10_2022-04-14-03-13-02.0.txt
1,/mnt/hdd1/Data/f45movement/f45coco/labels/train,F45_5L10_2022-04-14-03-25-35.2.jpg,F45_5L10_2022-04-14-03-25-35.2.txt
2,/mnt/hdd1/Data/f45movement/f45coco/labels/val,F45_5L10_2022-04-14-03-50-28.1.jpg,F45_5L10_2022-04-14-03-50-28.1.txt
3,/mnt/hdd1/Data/f45movement/f45coco/labels/val,F45_5L10_2022-04-14-05-52-34.0.jpg,F45_5L10_2022-04-14-05-52-34.0.txt
4,/mnt/hdd1/Data/f45movement/f45coco/labels/train,F45_5L10_2022-04-14-07-16-11.7.jpg,F45_5L10_2022-04-14-07-16-11.7.txt
...,...,...,...
178,/mnt/hdd1/Data/f45movement/f45coco/labels/train,F45_5L8_2022-04-14-03-06-35.1.jpg,F45_5L8_2022-04-14-03-06-35.1.txt
179,/mnt/hdd1/Data/f45movement/f45coco/labels/train,F45_5L8_2022-04-14-08-34-11.1.jpg,F45_5L8_2022-04-14-08-34-11.1.txt
180,/mnt/hdd1/Data/f45movement/f45coco/labels/train,F45_5L8_2022-04-14-08-38-28.6.jpg,F45_5L8_2022-04-14-08-38-28.6.txt
181,/mnt/hdd1/Data/f45movement/f45coco/labels/val,F45_5L8_2022-04-14-08-43-15.6.jpg,F45_5L8_2022-04-14-08-43-15.6.txt


In [365]:
debug_log_df['folder'].value_counts()

/mnt/hdd1/Data/f45movement/f45coco/labels/train    155
/mnt/hdd1/Data/f45movement/f45coco/labels/val       28
Name: folder, dtype: int64