Mapping image net to 16 named classes 

In [None]:
import os
import zipfile
from shutil import rmtree, copy2
from pathlib import Path
from tqdm import tqdm


IMAGENET_DIR = "/kaggle/input/imagenetmini-1000/imagenet-mini/train" 
OUTPUT_DIR = "/kaggle/working/output_data"
OUTPUT_ZIP = "/kaggle/working/output.zip"

CATEGORY_MAPPINGS = {
    "knife": ['n03041632'],
    "keyboard": ['n03085013', 'n04505470'],
    "elephant": ['n02504013', 'n02504458'],
    "bicycle": ['n02835271', 'n03792782'],
    "airplane": ['n02690373', 'n03955296', 'n13861050', 'n13941806'],
    "clock": ['n02708093', 'n03196217', 'n04548280'],
    "oven": ['n03259401', 'n04111414', 'n04111531'],
    "chair": ['n02791124', 'n03376595', 'n04099969', 'n00605023', 'n04429376'],
    "bear": ['n02132136', 'n02133161', 'n02134084', 'n02134418'],
    "boat": ['n02951358', 'n03344393', 'n03662601', 'n04273569', 'n04612373', 'n04612504'],
    "cat": ["n02122878", "n02123045", "n02123159", "n02126465", "n02123394", "n02123597", "n02124075", "n02125311"],
    "bottle": ['n02823428', 'n03937543', 'n03983396', 'n04557648', 'n04560804', 'n04579145', 'n04591713'],
    "truck": ['n03345487', 'n03417042', 'n03770679', 'n03796401', 'n00319176', 'n01016201', 'n03930630', 'n03930777', 
              'n05061003', 'n06547832', 'n10432053', 'n03977966', 'n04461696', 'n04467665'],
    "car": ['n02814533', 'n03100240', 'n03100346', 'n13419325', 'n04285008'],
    "bird": ['n01321123', 'n01514859', 'n01792640', 'n07646067', 'n01530575', 'n01531178', 'n01532829', 'n01534433', 
             'n01537544', 'n01558993', 'n01562265', 'n01560419', 'n01582220', 'n10281276', 'n01592084', 'n01601694', 
             'n01614925', 'n01616318', 'n01622779', 'n01795545', 'n01796340', 'n01797886', 'n01798484', 'n01817953', 
             'n01818515', 'n01819313', 'n01820546', 'n01824575', 'n01828970', 'n01829413', 'n01833805', 'n01843065', 
             'n01843383', 'n01855032', 'n01855672', 'n07646821', 'n01860187', 'n02002556', 'n02002724', 'n02006656', 
             'n02007558', 'n02009229', 'n02009912', 'n02011460', 'n02013706', 'n02017213', 'n02018207', 'n02018795', 
             'n02025239', 'n02027492', 'n02028035', 'n02033041', 'n02037110', 'n02051845', 'n02056570'],
    "dog": ['n02085782', 'n02085936', 'n02086079', 'n02086240', 'n02086646', 'n02086910', 'n02087046', 'n02087394', 
            'n02088094', 'n02088238', 'n02088364', 'n02088466', 'n02088632', 'n02089078', 'n02089867', 'n02089973', 
            'n02090379', 'n02090622', 'n02090721', 'n02091032', 'n02091134', 'n02091244', 'n02091467', 'n02091635', 
            'n02091831', 'n02092002', 'n02092339', 'n02093256', 'n02093428', 'n02093647', 'n02093754', 'n02093859', 
            'n02093991', 'n02094114', 'n02094258', 'n02094433', 'n02095314', 'n02095570', 'n02095889', 'n02096051', 
            'n02096294', 'n02096437', 'n02096585', 'n02097047', 'n02097130', 'n02097209', 'n02097298', 'n02097474', 
            'n02097658', 'n02098105', 'n02098286', 'n02099267', 'n02099429', 'n02099601', 'n02099712', 'n02099849', 
            'n02100236', 'n02100583', 'n02100735', 'n02100877', 'n02101006', 'n02101388', 'n02101556', 'n02102040', 
            'n02102177', 'n02102318', 'n02102480', 'n02102973', 'n02104029', 'n02104365', 'n02105056', 'n02105162', 
            'n02105251', 'n02105505', 'n02105641', 'n02105855', 'n02106030', 'n02106166', 'n02106382', 'n02106550', 
            'n02106662', 'n02107142', 'n02107312', 'n02107574', 'n02107683', 'n02107908', 'n02108000', 'n02108422', 
            'n02108551', 'n02108915', 'n02109047', 'n02109525', 'n02109961', 'n02110063', 'n02110185', 'n02110627', 
            'n02110806', 'n02110958', 'n02111129', 'n02111277', 'n08825211', 'n02111500', 'n02112018', 'n02112350', 
            'n02112706', 'n02113023', 'n02113624', 'n02113712', 'n02113799', 'n02113978']
}

def filter_and_merge_imagenet_data(src_dir, dest_dir, category_mappings):
    """
    Copy and merge a subset of ImageNet data corresponding to specific categories.
    """
    if os.path.exists(dest_dir):
        rmtree(dest_dir) 

    os.makedirs(dest_dir, exist_ok=True)
    
    for category, wnids in category_mappings.items():
        category_dir = os.path.join(dest_dir, category)
        os.makedirs(category_dir, exist_ok=True)
    
        image_counter = 1
        
        for wnid in tqdm(wnids, desc=f"Processing {category}", unit="WNID"):
            wnid_dir = os.path.join(src_dir, wnid)
            if os.path.exists(wnid_dir):
                for file_name in os.listdir(wnid_dir):
                    file_path = os.path.join(wnid_dir, file_name)
                    if os.path.isfile(file_path):
                        new_file_name = f"{category.upper()}{image_counter}.JPEG"
                        new_file_path = os.path.join(category_dir, new_file_name)
                        copy2(file_path, new_file_path)
                        image_counter += 1

def create_zip_from_directory(source_dir, output_zip_path):
    """
    Create a zip file from the contents of a directory.
    """
    with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        total_files = sum([len(files) for _, _, files in os.walk(source_dir)])
        
        with tqdm(total=total_files, desc="Zipping files", unit="file") as pbar:
            for root, _, files in os.walk(source_dir):
                for file in files:
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, start=source_dir)
                    zipf.write(file_path, arcname)
                    pbar.update(1)


In [None]:
filter_and_merge_imagenet_data(IMAGENET_DIR, OUTPUT_DIR, CATEGORY_MAPPINGS)
create_zip_from_directory(OUTPUT_DIR, OUTPUT_ZIP)