In [1]:
import numpy as np
import faiss
import json

In [2]:
# Specify the parameters
dim = 100
max_rows = None
embedding_file = 'wikidataos.complEx.graph-embeddings.txt'
qnode_to_index_file = 'qnodes_to_id.json'
output_index = 'wikidata_index.idx'

### Build the qnode to index dictionary

In [None]:
qnode_to_index = {qnode: i for i, qnode in enumerate(np.loadtxt(embedding_file, skiprows=1, max_rows=max_rows, \
                                                                usecols=range(1), dtype='str'))}

In [None]:
with open(qnode_to_index_file, 'w') as fd:
    json.dump(qnode_to_index, fd)

### Build the FAISS index

In [None]:
%time xb = np.loadtxt(embedding_file, skiprows=1, max_rows=max_rows, delimiter=' ', \
                      usecols=range(1,dim+1)).astype(np.float32)

In [None]:
# Use METRIC_L2 in case you don't want projections
index = faiss.index_factory(dim, 'IVF65536_HNSW32,Flat', faiss.METRIC_L2)

In [None]:
index.set_direct_map_type(faiss.DirectMap.Array)

In [None]:
index.is_trained

In [None]:
%time index.train(xb)

In [None]:
index.is_trained

In [None]:
%time index.add(xb)

In [None]:
%time faiss.write_index(index, output_index)