In [1]:
import os
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import LabelEncoder 

In [2]:
data_dir = os.path.join(os.getcwd(), 'wiki/')

In [3]:
def load_graph(fname, data_dir=data_dir):
    graph = []
    
    file_path = os.path.join(data_dir, '%s.txt' % (fname))
    with open(file_path) as file:
        for line in file:
            line = line.strip().split('\t')
            graph.append(np.array([line[0], line[1]]))

    return np.array(graph)

In [8]:
def convert_graph(fname):
    graph = load_graph(fname)
    df_graph = pd.DataFrame(graph, columns=['cat', 'edge'])
    
    lb = LabelEncoder()
    df_graph['cat_int'] = lb.fit_transform(graph[:, 0])
    df_graph['edge_int'] = lb.fit_transform(graph[:, 1]) + len(np.unique(df_graph['cat_int']))
    
    df_graph[['cat_int', 'edge_int']].to_csv(os.path.join(data_dir, '%s.edgelist' % (fname)), header=False, index=False, sep=' ')
    
    int_to_cat = df_graph[['cat','cat_int']].drop_duplicates().set_index('cat_int').to_dict()['cat']
    int_to_edge = df_graph[['edge','edge_int']].drop_duplicates().set_index('edge_int').to_dict()['edge']

    pickle.dump(int_to_cat, open(os.path.join(data_dir, '%s_dict_cat.pkl' % (fname)), "wb"))
    pickle.dump(int_to_edge, open(os.path.join(data_dir, '%s_dict_edge.pkl' % (fname)), "wb"))
    
    return df_graph, int_to_cat, int_to_edge

In [9]:
fnames = ['cat_edges1', 'cat_edges_2', 'link_edges']
for fname in fnames:
    df_graph, int_to_cat, int_to_edge = convert_graph(fname)

In [10]:
np.max(list(int_to_edge.keys()))

31739

In [38]:
df_cifar_labels = pd.read_csv(os.path.join(data_dir, 'graph_to_cifar_labels.csv'))

In [41]:
graph_to_cifar_labels = df_cifar_labels.set_index('wiki_graph').to_dict()['CIFAR']
pickle.dump(graph_to_cifar_labels, open(os.path.join(data_dir, 'graph_to_cifar_labels.pkl'), "wb"))

In [43]:
graph_to_cifar_labels = pickle.load(open(os.path.join(data_dir, 'graph_to_cifar_labels.pkl'), "rb"))
graph_to_cifar_labels

{'Aluminum_can': 'can',
 'Arecaceae': 'palm_tree',
 'Batoidea': 'ray',
 'Chili pepper': 'sweet_pepper',
 'Computer keyboard': 'keyboard',
 'Light_fixture': 'lamp',
 'Opossum': 'possum',
 'Orange (fruit)': 'orange',
 'Pinniped': 'seal',
 'Plate (dishware)': 'plate',
 'Table (furniture)': 'table',
 'apples': 'apple',
 'aquarium': 'aquarium_fish',
 'baby': 'baby',
 'bear': 'bear',
 'beaver': 'beaver',
 'bed': 'bed',
 'bee': 'bee',
 'beetle': 'beetle',
 'bicycle': 'bicycle',
 'bottles': 'bottle',
 'bowls': 'bowl',
 'boy': 'boy',
 'bridge': 'bridge',
 'bus': 'bus',
 'butterfly': 'butterfly',
 'camel': 'camel',
 'castle': 'castle',
 'caterpillar': 'caterpillar',
 'cattle': 'cattle',
 'chair': 'chair',
 'chimpanzee': 'chimpanzee',
 'clock': 'clock',
 'cloud': 'cloud',
 'cockroach': 'cockroach',
 'couch': 'couch',
 'crab': 'crab',
 'crocodile': 'crocodile',
 'cups': 'cup',
 'dinosaur': 'dinosaur',
 'dolphin': 'dolphin',
 'elephant': 'elephant',
 'flatfish': 'flatfish',
 'forest': 'forest',
 'f