In [3]:
import numpy as np
import matplotlib.pyplot as plt
import json
import pandas as pd
import os
from os import listdir
from os.path import isfile, join
import shutil

In [4]:
rootDir = '../../../Datasets/TerraIncognita'
dataDir = f'{rootDir}/eccv_18_all_images_sm'
annDir = f'{rootDir}/eccv_18_annotation_files'
annFiles = [join(annDir, f) for f in listdir(annDir) if isfile(join(annDir, f))]
txtDir = '../data/txt_lists/TerraIncognita'
domains = [38, 43, 46, 100]
labels = [1, 3, 5, 6, 7, 8, 9, 10, 11, 16, 21, 30, 33, 34, 51, 99]

In [5]:
dataset = json.load(open(annFiles[3], 'r'))
[sample for sample in dataset['categories']]

[{'id': 6, 'name': 'bobcat'},
 {'id': 1, 'name': 'opossum'},
 {'id': 30, 'name': 'empty'},
 {'id': 9, 'name': 'coyote'},
 {'id': 3, 'name': 'raccoon'},
 {'id': 11, 'name': 'bird'},
 {'id': 8, 'name': 'dog'},
 {'id': 16, 'name': 'cat'},
 {'id': 5, 'name': 'squirrel'},
 {'id': 10, 'name': 'rabbit'},
 {'id': 7, 'name': 'skunk'},
 {'id': 99, 'name': 'rodent'},
 {'id': 21, 'name': 'badger'},
 {'id': 34, 'name': 'deer'},
 {'id': 33, 'name': 'car'},
 {'id': 51, 'name': 'fox'}]

In [6]:
def recover_labels(images, dataset):
    labels = []
    for img in images:
        img_id = img[:-4]
        res = next((ann for ann in dataset['annotations'] if ann['image_id'] == img_id), None)
        if res is not None:
            labels.append(res['category_id'])
    return labels

In [7]:
def load_splits(annFile):

    dataset = json.load(open(annFile, 'r'))
    locations = [sample['location'] for sample in dataset['images']]
    images = [sample['file_name'] for sample in dataset['images']]

    labels = recover_labels(images, dataset)
    
    df = pd.DataFrame(list(zip(images, locations, labels)), columns =['file_name', 'location', 'label'])
    
    return df

In [12]:
def save_splits(df, domains, labels):
    if not os.path.exists(txtDir):
        os.mkdir(txtDir)
    for domain in domains:
        if not os.path.exists(f'{rootDir}/{domain}'):
            os.mkdir(f'{rootDir}/{domain}')
            for label in labels:
                os.mkdir(f'{rootDir}/{domain}/{label}')
        
        if not os.path.exists(join(txtDir, str(domain))):
            os.mkdir(join(txtDir, str(domain)))
        file_path = join(txtDir, f'{domain}/{domain}.txt')
        
        dom_df = df.loc[df['location'] == domain]
        
        txt_list = []
        for index, row in dom_df.iterrows():
            origin = f'{dataDir}/{row["file_name"]}'
            target = f'{rootDir}/{domain}/{row["label"]}/{row["file_name"]}'
            txt_list.append(f'{domain}/{row["label"]}/{row["file_name"]} {labels.index(row["label"])}')
            shutil.copy(origin, target)
            
        if txt_list:   
            f = open(file_path,'a')
            s1='\n'.join(txt_list)
            f.write(s1)
            f.write('\n')
            f.close()

In [13]:
for annFile in annFiles:
    df = load_splits(annFile)
    df = df.loc[df['location'].isin(domains)]
    save_splits(df, domains, labels)