In [14]:
import os
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import LabelEncoder 

In [15]:
data_dir = os.path.join(os.getcwd(), 'wiki/')

In [16]:
def load_graph(fname, data_dir=data_dir):
    graph = []
    
    file_path = os.path.join(data_dir, '%s.txt' % (fname))
    with open(file_path) as file:
        for line in file:
            line = line.strip().split('\t')
            graph.append(np.array([line[0], line[1]]))

    return np.array(graph)

In [17]:
def convert_graph(fname):
    graph = load_graph(fname)
    df_graph = pd.DataFrame(graph, columns=['cat', 'edge'])
    
    lb = LabelEncoder()
    df_graph['cat_int'] = lb.fit_transform(graph[:, 0])
    df_graph['edge_int'] = lb.fit_transform(graph[:, 1])
    
    df_graph[['cat_int', 'edge_int']].to_csv(os.path.join(data_dir, '%s.edgelist' % (fname)), header=False, index=False, sep=' ')
    
    int_to_cat = df_graph[['cat','cat_int']].drop_duplicates().set_index('cat_int').to_dict()['cat']
    int_to_edge = df_graph[['edge','edge_int']].drop_duplicates().set_index('edge_int').to_dict()['edge']

    pickle.dump(int_to_cat, open(os.path.join(data_dir, '%s_dict_cat.pk' % (fname)), "wb"))
    pickle.dump(int_to_edge, open(os.path.join(data_dir, '%s_dict_edge.pk' % (fname)), "wb"))
    
    return df_graph, int_to_cat, int_to_edge

In [19]:
fnames = ['cat_edges1', 'cat_edges_2', 'link_edges']
for fname in fnames:
    df_graph, int_to_cat, int_to_edge = convert_graph(fname)