In [2]:
import json
import os

class KaggleFile:
    def __init__(self,
                file_type,
                json_file_path,
                save_path):
        self._file_type = file_type.lower()
        self._json_file_path = json_file_path
        self.save_path = save_path
        self._file_name = os.path.join(json_file_path, "%s.json" % self._file_type)
        self.load_file()
    
    def load_file(self):
        with open(self._file_name) as f:
            self._json_file = json.load(f)
            print("[ %s ] is loaded." % self._file_name)
    
    def get_info(self):
        print("Attributes of [ %s ]:" % self._file_type)
        print('>>\t', self._json_file.keys())
        print("Total images of [ %s ]:" % self._file_type)
        print('>>\t', len(self._json_file.get("images")))
    
    def make_class_id_map(self):
        self._class_id_map = {}
        images_list = self._json_file.get("images")
        for image in images_list:
            image_id = image.get("id")
            image_class = image.get("class")
            
            result = self._class_id_map.get(image_class, [])
            result.append(image_id)
            self._class_id_map[image_class] = result
    
    def save_class_id_map(self):
        filename = os.path.join(self.save_path, "%s_class_id.json" % self._file_type)
        with open(filename, "w") as f:
            json.dump(self._class_id_map, f)
            print("[ %s ] is saved" % filename)
    
    def make_id_class_map(self):
        self._id_class_map = {}
        images_list = self._json_file.get("images")
        for image in images_list:
            image_id = image.get("id")
            image_class = image.get("class")
            self._id_class_map[image_id] = image_class
            
    def save_id_class_map(self):
        filename = os.path.join(self.save_path, "%s_id_class.json" % self._file_type)
        with open(filename, "w") as f:
            json.dump(self._id_class_map, f)
            print("[ %s ] is saved" % filename)
    
    def save_both_file(self):
        self.make_class_id_map()
        self.save_class_id_map()
        self.make_id_class_map()
        self.save_id_class_map()
        

In [3]:
val_file = KaggleFile(file_type = 'val',
                json_file_path = '/tf/imaterialist-product-2019/init_files',
                save_path = '/tf/imaterialist-product-2019/preprocess')
# al_file.save_both_file()

[ /tf/imaterialist-product-2019/init_files/val.json ] is loaded.


In [4]:
train_file = KaggleFile(file_type = 'train',
                json_file_path = '/tf/imaterialist-product-2019/init_files',
                save_path = '/tf/imaterialist-product-2019/preprocess')
# train_file.save_both_file()

[ /tf/imaterialist-product-2019/init_files/train.json ] is loaded.


In [5]:
val_file.get_info()

Attributes of [ val ]:
>>	 dict_keys(['images'])
Total images of [ val ]:
>>	 10095


In [6]:
train_file.get_info()

Attributes of [ train ]:
>>	 dict_keys(['images'])
Total images of [ train ]:
>>	 1011532


In [7]:
test_file = KaggleFile(file_type = 'test',
                json_file_path = '/tf/imaterialist-product-2019/init_files',
                save_path = '/tf/imaterialist-product-2019/preprocess')
test_file.get_info()

[ /tf/imaterialist-product-2019/init_files/test.json ] is loaded.
Attributes of [ test ]:
>>	 dict_keys(['images'])
Total images of [ test ]:
>>	 90834
