In [1]:
# Get the mapping from aircraft: variant -> family -> manufacturer

In [2]:
# We first read in all identifiers
import pandas as pd
from pathlib import Path

In [3]:
META_DATASET_PATH = ""
splits = ["train", "val", "test"]
dataset_path = Path(META_DATASET_PATH) / "datasets/fgvc-aircraft-2013b/data/"

image_ids = (
    pd.concat([pd.read_csv(dataset_path / f"images_{split}.txt", header=None, dtype=str) for split in splits])
    .astype(str)
    .values.squeeze()
)
variants = pd.read_csv(dataset_path / "variants.txt", header=None)
families = pd.read_csv(dataset_path / "families.txt", header=None)
manufacturers = pd.read_csv(dataset_path / "manufacturers.txt", header=None)

In [4]:
import itertools


def read_in_text_files(cls_type):
    img_id_to_class = []
    for split in splits:
        with open(dataset_path / f"images_{cls_type}_{split}.txt", "r") as f:
            text = f.readlines()
        text = [s.replace("\n", "").split(" ", 1) for s in text]
        img_id_to_class.append(text)
    img_id_to_class = itertools.chain(*img_id_to_class)
    return_map = {key: val for (key, val) in img_id_to_class}
    return return_map

In [5]:
image_id_variant = read_in_text_files("variant")
image_id_family = read_in_text_files("family")
image_id_manufacturer = read_in_text_files("manufacturer")

In [6]:
from collections import defaultdict

variant_map = dict()
for image_id in image_ids:
    variant = image_id_variant[image_id]
    family = image_id_family[image_id]
    manufacturer = image_id_manufacturer[image_id]
    variant_map[image_id] = (variant, family, manufacturer)

In [7]:
variant_map_to_family = {val[0]: val[1] for val in variant_map.values()}
family_to_class = {val: i for (i, val) in enumerate(families.values.flatten().tolist())}
variant_map_to_manufacturer = {val[0]: val[2] for val in variant_map.values()}
manufacturer_to_class = {val: i for (i, val) in enumerate(manufacturers.values.flatten().tolist())}

In [8]:
# finally we make this into class mappings of integers by enumerating
# From the aircraft_splits.json of meta-dataset
md_split_to_variant = {
    "train": [
        "A340-300",
        "A318",
        "Falcon 2000",
        "F-16A/B",
        "F/A-18",
        "C-130",
        "MD-80",
        "BAE 146-200",
        "777-200",
        "747-400",
        "Cessna 172",
        "An-12",
        "A330-300",
        "A321",
        "Fokker 100",
        "Fokker 50",
        "DHC-1",
        "Fokker 70",
        "A340-200",
        "DC-6",
        "747-200",
        "Il-76",
        "747-300",
        "Model B200",
        "Saab 340",
        "Cessna 560",
        "Dornier 328",
        "E-195",
        "ERJ 135",
        "747-100",
        "737-600",
        "C-47",
        "DR-400",
        "ATR-72",
        "A330-200",
        "727-200",
        "737-700",
        "PA-28",
        "ERJ 145",
        "737-300",
        "767-300",
        "737-500",
        "737-200",
        "DHC-6",
        "Falcon 900",
        "DC-3",
        "Eurofighter Typhoon",
        "Challenger 600",
        "Hawk T1",
        "A380",
        "777-300",
        "E-190",
        "DHC-8-100",
        "Cessna 525",
        "Metroliner",
        "EMB-120",
        "Tu-134",
        "Embraer Legacy 600",
        "Gulfstream IV",
        "Tu-154",
        "MD-87",
        "A300B4",
        "A340-600",
        "A340-500",
        "MD-11",
        "707-320",
        "Cessna 208",
        "Global Express",
        "A319",
        "DH-82",
    ],
    "valid": [
        "737-900",
        "757-300",
        "767-200",
        "A310",
        "A320",
        "BAE 146-300",
        "CRJ-900",
        "DC-10",
        "DC-8",
        "DC-9-30",
        "DHC-8-300",
        "Gulfstream V",
        "SR-20",
        "Tornado",
        "Yak-42",
    ],
    "test": [
        "737-400",
        "737-800",
        "757-200",
        "767-400",
        "ATR-42",
        "BAE-125",
        "Beechcraft 1900",
        "Boeing 717",
        "CRJ-200",
        "CRJ-700",
        "E-170",
        "L-1011",
        "MD-90",
        "Saab 2000",
        "Spitfire",
    ],
}
md_class_to_variant = list(itertools.chain(*[md_split_to_variant[split] for split in ["train", "valid", "test"]]))

In [9]:
variant_class_to_family_class = dict()
for i in range(len(variants)):
    variant = md_class_to_variant[i]
    family = variant_map_to_family[variant]
    family_class = family_to_class[family]
    variant_class_to_family_class[i] = family_class

In [10]:
variant_class_to_manufacturer_class = dict()
for i in range(len(variants)):
    variant = md_class_to_variant[i]
    manufacturer = variant_map_to_manufacturer[variant]
    manufacturer_class = manufacturer_to_class[manufacturer]
    variant_class_to_manufacturer_class[i] = manufacturer_class

In [14]:
import json

with open("variant_class_to_family_class.json", "w") as f:
    json.dump(variant_class_to_family_class, f)

with open("variant_class_to_manufacturer_class.json", "w") as f:
    json.dump(variant_class_to_manufacturer_class, f)