**Simple Schemes for Knowledge Graph Embedding**

This colab presents code for training knowledge graph embeddings on the FB15k-237 knowledge base using two KG embedding techniques (rotatE and transE) and evaluates the metrics on MRR and Hits@K. This colab can be run to save entitiy and relation embeddings at every 10 epochs.

Included in the colab is also code to preprocess the data and display the dimensional embedings to illustrate the geometrical properties of the embedding schemas (Visualization Tools and MID to Entity Name Mapping sections). However, this portion of the code was run on a GCP VM and does not run out of the box on colab.

**Custom DataLoader**

In [1]:
from collections import defaultdict
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset

# Allow for reproducible results
np.random.seed(234)
torch.manual_seed(234)

<torch._C.Generator at 0x1880fc9e550>

In [2]:
class DataGenerator(Dataset):

    def __init__(self, triples, num_entities, num_negative_samples, all_triples=None, data_type="train"):
        super(Dataset, self).__init__()
        self.triples = triples
        self.num_entities = num_entities
        self.num_negative_samples = num_negative_samples
        self.all_triples = all_triples
        self.data_type = data_type
        self.len = len(triples)
        self.true_head_relation, self.true_relation_tail = self._get_true_head_tail_lists()

    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        positive_sample = self.triples[idx]
        head, relation, tail = positive_sample

        positive_sample = torch.LongTensor(positive_sample)

        if self.data_type == "train":
            true_heads = self.true_relation_tail[(relation, tail)]
            true_tails = self.true_head_relation[(head, relation)]

            # A number of negative sampling methods was tried - including
            # taking a set difference before random sampling, randomly sampling
            # each corrupted head, etc but the method implemented in the paper
            # ended up being the fastest
            corrupted_heads = []
            corrupted = np.random.randint(self.num_entities, size=self.num_negative_samples*2)
            while len(corrupted_heads) < self.num_negative_samples:
                mask = np.in1d(corrupted, true_heads, assume_unique=True, invert=True)
                corrupted = corrupted[mask]
                corrupted_heads.extend(corrupted)
            corrupted_heads = corrupted_heads[:self.num_negative_samples]
            corrupted_heads = torch.LongTensor(corrupted_heads)

            corrupted_tails = []
            corrupted = np.random.randint(self.num_entities, size=self.num_negative_samples*2)
            while len(corrupted_tails) < self.num_negative_samples:
                mask = np.in1d(corrupted, true_tails, assume_unique=True, invert=True)
                corrupted = corrupted[mask]
                corrupted_tails.extend(corrupted)
            corrupted_tails = corrupted_tails[:self.num_negative_samples]
            corrupted_tails = torch.LongTensor(corrupted_tails)

            filter_bias = torch.LongTensor([0] * len(positive_sample))
        else:
            # We cannot empirically say that one head is better than another for a valid
            # (head, relation, tail) triplet. Ex. (Bob, friend, Joe), (Jack, friend, Joe).
            # In this case, we replace the alternate triplet with the current true head
            # and add a filter bias of -1 to push it down in the rankings so ideally prevent it
            # from showing up in our HITS@K and MRR metrics
            corrupted_heads = [(0, test_head) if (test_head, relation, tail) not in self.all_triples
                    else (-1, head) for test_head in range(self.num_entities)]
            corrupted_heads[head] = (0, head)
            corrupted_heads = torch.LongTensor(corrupted_heads)

            corrupted_tails = [(0, test_tail) if (head, relation, test_tail) not in self.all_triples
                    else (-1, tail) for test_tail in range(self.num_entities)]
            corrupted_tails[tail] = (0, tail)
            corrupted_tails = torch.LongTensor(corrupted_tails)

            filter_bias = (corrupted_heads[:, 0], corrupted_tails[:, 0])
            corrupted_heads = corrupted_heads[:, 1]
            corrupted_tails = corrupted_tails[:, 1]

        return positive_sample, corrupted_heads, corrupted_tails, filter_bias

    # We need to be able to get a list of true heads and tails
    # quickly during negative sampling
    def _get_true_head_tail_lists(self):
        true_head_relation = defaultdict(set)
        true_relation_tail = defaultdict(set)
        for triplet in self.triples:
            head, relation, tail = triplet
            true_head_relation[(head, relation)].add(tail)
            true_relation_tail[(relation, tail)].add(head)
        return true_head_relation, true_relation_tail

In [3]:
def get_data_loader(triples, num_entities, num_negative_samples, batch_size,
        all_triples=None, data_type="train", num_workers=0):
    return DataLoader(
        DataGenerator(triples, num_entities, num_negative_samples, all_triples=all_triples, data_type=data_type),
        batch_size = batch_size,
        shuffle = True,
        num_workers = num_workers
    )

**RotatE**





In [4]:
def rotatE(head, relation, tail, embedding_range, sample_type, margin):
    # We used 2 x hidden_dim so that we can split them into real and imaginary
    # components here
    head_real, head_imag = torch.chunk(head, 2, dim=2)
    tail_real, tail_imag = torch.chunk(tail, 2, dim=2)

    # This evenly distributes the relation between [-pi, pi]
    norm_relation = relation / (embedding_range / math.pi)

    relation_real = torch.cos(norm_relation)
    relation_imag = torch.sin(norm_relation)

    real_dist = (head_real * relation_real - head_imag * relation_imag) - tail_real
    imag_dist = (head_real * relation_imag + head_imag * relation_real) - tail_imag

    # Each dimension represents its own rotation in imaginary space.
    # Take the Frobenius norm to compute the score and sum across all
    # dimensions
    total_dist = torch.stack([real_dist, imag_dist], dim=0)
    total_dist = torch.linalg.norm(total_dist, dim=0).sum(dim=2)

    # If something is close enough, we don't want to penalize it
    margin_adjusted_dist = margin - total_dist
    return margin_adjusted_dist

**TransE**





In [5]:
def transE(head, relation, tail, sample_type, margin):
  dist = head + relation - tail
  score = margin - torch.linalg.norm(dist, ord=1, dim=2)
  return score

**Generic Knowledge Graph Embedding Model**

In [6]:
import math
import torch.nn as nn
import torch.nn.functional as F

In [7]:
class KGEmbedding(nn.Module):
  def __init__(self, num_entities, num_relations, hidden_dim, margin, model):
    super(KGEmbedding, self).__init__()

    self.num_entities = num_entities
    self.num_relations = num_relations
    # Entity embed needs to have out dim 2 x hidden_dim for rotatE
    # b/c each dimension needs to have a real and imaginary component
    self.entity_dim = 2 * hidden_dim if model == 'rotatE' else hidden_dim
    self.relation_dim = hidden_dim
    self.margin = margin
    self.model = model
    self.epsilon = 2.0

    # In the paper (default):
    # margin = 12.0
    # hidden_dim = 500
    # epsilon = 2.0
    # embedding_range = (margin + epsilon) / hidden_dim = 0.028
    # This means that all entity embeddings and relation embeddings
    # are initialized with value between -0.028 and 0.028

    # Initialization of the embedding range to be close to 0 helps prevent
    # crazy initializations. We divide by hidden_dim to reduce variance in
    # per-parameter initialization as we increase total number of dimensions
    # co consider
    self.embedding_range = (self.margin + self.epsilon) / hidden_dim

    self.entity_embed = nn.Parameter(torch.zeros(self.num_entities, self.entity_dim,
                                    requires_grad=True))
    nn.init.uniform_(
        self.entity_embed,
        a=-self.embedding_range,
        b=self.embedding_range
    )

    # Relation entity can only affect the phase, not the modulus of the
    # entity embedding. The modulus is fixed to be |r_i| = 1.
    self.relation_embed = nn.Parameter(torch.zeros(self.num_relations, self.relation_dim,
                                      requires_grad=True))
    nn.init.uniform_(
        self.relation_embed,
        a=-self.embedding_range,
        b=self.embedding_range
    )

  def forward(self, sample, sample_type):
    # For each positive example, the paper has 128 negative examples using
    # the same head + relation or relation + tail but with a corrupted entity

    # Sample can be a positive example, a negative tail example, or a
    # negative head example. Each needs to be processed slightly
    # differently before being passed to the respective model function

    if sample_type == 'positive':
      # sample = Tensor([batch_size, 3]) where the
      # 3 represents head, relation, tail
      head = torch.index_select(
          self.entity_embed,
          dim=0,
          index=sample[:,0]
      ).unsqueeze(1)

      relation = torch.index_select(
          self.relation_embed,
          dim=0,
          index=sample[:,1]
      ).unsqueeze(1)

      tail = torch.index_select(
          self.entity_embed,
          dim=0,
          index=sample[:,2]
      ).unsqueeze(1)

      # We call .unsqueeze(1) on this data so that
      # the output will be of dimension [batch_size, 1, 2 * self.hidden_dim]
      # so that the num dimensions match the negative examples that will be of
      # dimension [batch_size, num_neg_samples, 2 * self.hiddenIdim]

    elif sample_type == 'negative-head':
      # positive_tuple is torch.Tensor([batch_size, 3])
      # negative_head_entities is torch.Tensor([batch_size, num_neg_samples]) and
      # needs to be used in conjunction with positive_tuple to derive the 128
      # negative examples for each positive example
      positive_tuple, negative_head_entities = sample
      batch_size, num_neg_samples = negative_head_entities.shape

      head = torch.index_select(
          self.entity_embed,
          dim=0,
          index=negative_head_entities.view(-1)
      ).reshape(batch_size, num_neg_samples, self.entity_dim)

      relation = torch.index_select(
          self.relation_embed,
          dim=0,
          index=positive_tuple[:,1]
      ).unsqueeze(1)

      tail = torch.index_select(
          self.entity_embed,
          dim=0,
          index=positive_tuple[:,2]
      ).unsqueeze(1)


    elif sample_type == 'negative-tail':
      # same as "negative-head" except this time
      # the tail needs to be adjusted
      positive_tuple, negative_tail_entities = sample
      batch_size, num_neg_samples = negative_tail_entities.shape

      head = torch.index_select(
          self.entity_embed,
          dim=0,
          index=positive_tuple[:,0]
      ).unsqueeze(1)

      relation = torch.index_select(
          self.relation_embed,
          dim=0,
          index=positive_tuple[:,1]
      ).unsqueeze(1)

      tail = torch.index_select(
          self.entity_embed,
          dim=0,
          index=negative_tail_entities.view(-1)
      ).reshape(batch_size, num_neg_samples, self.entity_dim)

    if self.model == 'rotatE':
      return rotatE(head, relation, tail, self.embedding_range, sample_type, self.margin)
    elif self.model == 'transE':
      return transE(head, relation, tail, sample_type, self.margin)

**Run Model**

In [8]:
import os
from tqdm import trange

DATA_DIR = "transDatacopy/"
MODEL_DIR = "modelscopy/"

In [9]:
# Read the entities and relations dictionary files
def load_dict(file_path):
    loaded_dict = dict()
    with open(file_path, 'r') as f:
        for line in f:
            uid, val = line.strip().split('\t')
            loaded_dict[val] = int(uid)
    return loaded_dict

# Read the KG triples
def load_triples(file_path, entity2id, relation2id):
    triples = list()
    with open(file_path, 'r') as f:
        for line in f:
            head, relation, tail = line.strip().split('\t')
            triples.append((entity2id[head], relation2id[relation], entity2id[tail]))
    return triples

In [10]:
def save_model(model, optimizer, scheduler, epoch):
    # Check if MODEL_DIR exists and create if it doesn't
    if not os.path.exists(MODEL_DIR):
        os.makedirs(MODEL_DIR)

    torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict()
        },
        os.path.join(MODEL_DIR, f'checkpoint_{epoch}')
    )

    entity_embedding = model.entity_embed.detach().cpu().numpy()
    np.save(os.path.join(MODEL_DIR, f'entity_embedding_{epoch}'), entity_embedding)

    relation_embedding = model.relation_embed.detach().cpu().numpy()
    np.save(os.path.join(MODEL_DIR, f'relation_embedding_{epoch}'), relation_embedding)

In [11]:
def eval_model(model, data, num_entities, _num_negative_samples, _batch_size, all_data, data_type):
    model.eval()

    dataloader = get_data_loader(data, num_entities, _num_negative_samples, _batch_size, all_triples=all_data, data_type=data_type)

    final_metrics = defaultdict(float)
    dataset_metrics = []

    with torch.no_grad():
        for batch, (positive_sample, corrupted_heads, corrupted_tails, filter_bias) in enumerate(dataloader):
            head_bias, tail_bias = filter_bias
            if torch.cuda.is_available():
                positive_sample = positive_sample.cuda()
                corrupted_heads = corrupted_heads.cuda()
                corrupted_tails = corrupted_tails.cuda()
                head_bias = head_bias.cuda()
                tail_bias = tail_bias.cuda()

            # When we run eval, this list of "corrupted" values does contain one
            # (or more*) true positive values. We have to include the true triplet to
            # be able to run eval metrics like MRR and Hits@K
            # *If there is more than one true positive value, the others are weighted
            # negatively such that they appear lower in the ranking and will be unlikely
            # to skew the metrics. This is seen in the head_bias and tail_bias below
            corrupted_head_dist = model((positive_sample, corrupted_heads), 'negative-head') + head_bias
            corrupted_tail_dist = model((positive_sample, corrupted_tails), 'negative-tail') + tail_bias

            # We sort by descending, b/c margin in RotatE sets large distances to be negative
            head_arg_order = torch.argsort(corrupted_head_dist, dim=1, descending=True)
            tail_arg_order = torch.argsort(corrupted_tail_dist, dim=1, descending=True)

            true_head = positive_sample[:, 0]
            true_tail = positive_sample[:, 2]

            for ind in range(len(true_head)):
                # Pytorch way to evaluate rank of item in list
                true_head_rank = (head_arg_order[ind, :] == true_head[ind]).nonzero()
                true_head_rank = true_head_rank.item() + 1

                true_tail_rank = (tail_arg_order[ind, :] == true_tail[ind]).nonzero()
                true_tail_rank = true_tail_rank.item() + 1

                dataset_metrics.append({
                    'MRR': 1.0 / true_head_rank,
                    'MR': float(true_head_rank),
                    'HITS@1': 1.0 if true_head_rank <= 1 else 0.0,
                    'HITS@3': 1.0 if true_head_rank <= 3 else 0.0,
                    'HITS@10': 1.0 if true_head_rank <= 10 else 0.0
                })
                dataset_metrics.append({
                    'MRR': 1.0 / true_tail_rank,
                    'MR': float(true_tail_rank),
                    'HITS@1': 1.0 if true_tail_rank <= 1 else 0.0,
                    'HITS@3': 1.0 if true_tail_rank <= 3 else 0.0,
                    'HITS@10': 1.0 if true_tail_rank <= 10 else 0.0
                })
        for metric in dataset_metrics:
            for key, val in metric.items():
                final_metrics[key] += val
        for key, val in final_metrics.items():
            final_metrics[key] = val / len(dataset_metrics)

        print(f"{data_type} metrics: {final_metrics}")

In [16]:
# For training our actual model we ran this on a GCP VM.
# The same code will run on Colab but will likely take much longer.

def main():
    # List of hyper parametrs that we manually tuned using
    # the suggestions of the rotatE paper as a baseline. Training
    # and modeling was one on a GCP VM rather than in Colab
    _num_negative_samples = 128
    _batch_size = 1024
    _test_batch_size = 16
    _lr = 0.0001
    _hidden_dim = 2
    _margin = 12
    _num_epochs = 40
    _weight_decay = 5e-5
    _cuda = torch.cuda.is_available()

    # FB2k-237 data is presented in terms of "mids" (Ex. /m/23sdf) that
    # represent an entity. To train embeddings for these entities, we need to
    # assign each one an index to easily model in a neural network. These
    # maps are used to relation entities and relations to indices
    entity2id = load_dict(os.path.join(DATA_DIR, 'entities.dict'))
    relation2id = load_dict(os.path.join(DATA_DIR, 'relations.dict'))
    num_entities = len(entity2id)
    num_relations = len(relation2id)

    train_data = load_triples(os.path.join(DATA_DIR, 'trainReal.csv'), entity2id, relation2id)
    eval_data = load_triples(os.path.join(DATA_DIR, 'valid.csv'), entity2id, relation2id)
    test_data = load_triples(os.path.join(DATA_DIR, 'test.csv'), entity2id, relation2id)

    # Here we compile a list of all triples for the sake of negative sampling later on.
    # We don't want to accidentally consider a true triple as a negative sample
    all_data = [train_data, eval_data, test_data]

    # Get the train dataloader
    dataloader = get_data_loader(train_data, num_entities, _num_negative_samples, _batch_size, data_type="train")

    # Here we initialize the model to use the rotatE embedding loss paradigm. When
    # we trained our transE model, we flipped the final parameter to "transE"
    model = KGEmbedding(num_entities, num_relations, _hidden_dim, _margin, "transE")
    if _cuda:
        model = model.cuda()

    # We use Adam as our optimizer, which provides an adaptive learning rate
    # per parameter, but based on empirical testing, using a learning rate
    # scheduler provided slightly better performance
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=_lr, weight_decay=_weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2)

    print("Start Training...")
    for epoch in trange(_num_epochs, desc="Train", unit="Epoch"):
        total_loss = 0
        model.train()
        optimizer.zero_grad()

        for batch, (positive_sample, corrupted_heads, corrupted_tails, _) in enumerate(dataloader):
            if _cuda:
                positive_sample = positive_sample.cuda()
                corrupted_heads = corrupted_heads.cuda()
                corrupted_tails = corrupted_tails.cuda()

            positive_sample_dist = model(positive_sample, 'positive')
            positive_score = F.logsigmoid(positive_sample_dist)
            positive_sample_loss = -positive_score.mean()

            # Here we take the negative of the distance values because
            # we want the distances between (head, relation) and (tail)
            # entities for corrupted samples to be far apart. In our model,
            # we use "margin - distance", so a large distance would give
            # a negative value that we then flip to positive. After plugging
            # into the logsigmoid, that would yield a value close to 0
            corrupted_head_dist = model((positive_sample, corrupted_heads), 'negative-head')
            corrupted_head_score = F.logsigmoid(-corrupted_head_dist)
            corrupted_head_loss = -corrupted_head_score.mean()

            corrupted_tail_dist = model((positive_sample, corrupted_tails), 'negative-tail')
            corrupted_tail_score = F.logsigmoid(-corrupted_tail_dist)
            corrupted_tail_loss = -corrupted_tail_score.mean()

            # In the paper, each corrupted_head and corrupted_tail is treated as a separate example.
            # Here, we combine them into one, so we need to weight the positive_sample loss accordingly
            # by adding it twice
            loss = (positive_sample_loss + corrupted_head_loss + positive_sample_loss + corrupted_tail_loss) / 4
            total_loss += loss.item()

            #print(f"\nbatch: {batch}, loss: {loss}, pos_loss: {positive_sample_loss}, neg_head_loss: {corrupted_head_loss}, neg_tail_loss: {corrupted_tail_loss}")

            loss.backward()
            optimizer.step()

        # Technically having the scheduler step on evaluation loss would be
        # preferred. However, results did not differ much. Because running
        # eval takes significantly longer (due to increased number of negative
        # examples), we opted to step on total train loss. When selecting the
        # appropriate model to use for the final test metrics,
        # we referenced the eval metrics (MRR, Hits@K)
        scheduler.step(total_loss)

        # Evaluate the model on the valid set and save model state for testing or re-loading
        if epoch != 0 and epoch % 10 == 0:
            eval_model(model, eval_data, num_entities, _num_negative_samples, _test_batch_size, all_data, "eval")
            save_model(model, optimizer, scheduler, epoch)
            print(f"\nepoch: {epoch}, avg loss: {total_loss / len(dataloader)}")


    # Evaludate the model on the test set
    eval_model(model, test_data, num_entities, _num_negative_samples, _test_batch_size, all_data, "test")

In [18]:
main()

Start Training...


Train:   0%|          | 0/40 [00:00<?, ?Epoch/s]


IndexError: index out of range in self

**Visualization Tools**

In [None]:
# Note: This was only tested on GCP VM, it will not
# run on Google Colab without additional tinkering
# In addition, auxiliary files are needed that preload
# FB15k-237 "mid" values to their corresponding true
# names and index values

import math
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
from matplotlib import cm
from matplotlib.patches import Circle
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

np.random.seed(9654924)

def load_pickle(file_name):
    with open(file_name, 'rb') as f:
        p = pickle.load(f)
    return p

def load_numpy(file_name):
    with open(file_name, 'rb') as f:
        n = np.load(f)
    return n

def plot_couple_dimensions(path, model, embedding_path, examples_to_use=3, dims=(2,2)):
    """ Given a number of triples, plot their positions on a graph
    """
    mid_to_index_mapping = load_pickle(os.path.join(path, "mid_to_index_mapping.pickle"))
    mid_to_name_mapping = load_pickle(os.path.join(path, "mid_to_name_mapping.pickle"))
    relation_to_index_mapping = load_pickle(os.path.join(path, "relation_to_index_mapping.pickle"))
    used_tuples_list = load_pickle(os.path.join(path, "used_tuples.pickle"))

    dims_to_plot = dims[0] * dims[1]
    entity_embedding = load_numpy(embedding_path)
    relation_embedding = load_numpy(embedding_path.replace("entity_embedding", "relation_embedding"))
    num_entities, num_dim = entity_embedding.shape

    # Randomly select
    idxs = np.random.choice(range(len(used_tuples_list)), examples_to_use, replace=False)
    selected_entities = np.array(used_tuples_list)[idxs]

    # All selected head tails have the same relation between them
    relevant_relation = int(relation_to_index_mapping[selected_entities[0][1]])

    heads = [head for head, _, _ in selected_entities]
    tails = [tail for _, _, tail in selected_entities]
    selected_entities = heads + tails

    if model == 'rotatE':
        # Split num_dim in half after accounting for imaginary dimension
        num_dim /= 2
        real, imag = np.split(entity_embedding, 2, axis=1)
        variances = (np.var(real, axis=0) + np.var(imag, axis=0)) / 2
        X, Y = real, imag

    elif model == 'transE':
        # Best to plot in at least 2D, so we select the dimensions with
        # highest variance and assign 2 dimensions per test dimension
        variances = np.var(entity_embedding, axis=0)
        double_dims = np.argsort(variances)[-(dims_to_plot*2):]

        X = entity_embedding
        Y = entity_embedding
        for ind in range(dims_to_plot):
            Y[:, double_dims[ind]] = entity_embedding[:, double_dims[-(ind+1)]]
        r = relation_embedding

    else:
        raise(f"Invalid model type for plotting: {model}")

    assert len(variances) == num_dim
    selected_dims = np.argsort(variances)[-(dims_to_plot):]

    row, col = 0, 0
    num_row, num_col = dims
    plt.figure(figsize=(32,20))
    fig, axis = plt.subplots(num_row, num_col)
    for ind, dim in enumerate(selected_dims):
        axis[row][col].axhline(0, color='black')
        axis[row][col].axvline(0, color='black')
        for mid in selected_entities:
            index = int(mid_to_index_mapping[mid])
            x = X[index][dim]
            y = Y[index][dim]
            axis[row][col].scatter(x, y, marker="o", s=50)
            axis[row][col].annotate(mid_to_name_mapping[mid], (x, y), fontsize=6)

        # Add concentric circles to illustrate rotational nature
        if model == 'rotatE':
            x_low, x_high = axis[row][col].get_xlim()
            y_low, y_high = axis[row][col].get_ylim()
            high = max(abs(x_high), abs(y_high), abs(x_low), abs(y_low))

            # Determine the interval at which to draw concentric circles
            interval = 1
            while high < 1:
                high *= 10
                interval /= 10

            multiplier = 1
            while multiplier * interval < high:
                axis[row][col].add_patch(Circle((0, 0), multiplier * interval, color='r', fill=False, linestyle='dotted'))
                multiplier += 1
            axis[row][col].set_title(f'rotatE Embedding Dim {dim}', fontsize=10)

        # Draw relations as vectors
        elif model == 'transE':
            dim_2 = double_dims[-(ind+1)]
            for head in heads:
                index = int(mid_to_index_mapping[head])
                start_x = X[index][dim]
                start_y = Y[index][dim]
                dist_x = r[relevant_relation][dim]
                dist_y = r[relevant_relation][dim_2]
                axis[row][col].arrow(start_x, start_y, dist_x, dist_y)
            axis[row][col].set_title(f'transE Embedding Dims {dim} and {dim_2}', fontsize=10)

        col += 1
        if col == num_col:
            col = 0
            row += 1

    plt.tight_layout()
    plt.savefig(f'{model}_embeddings')
relational_embedding = np.load("modelscopy/relation_embedding_30.npy",allow_pickle=True)

entity_embedding = np.load("modelscopy/entity_embedding_30.npy",allow_pickle=True)

print(relational_embedding.shape)
print(entity_embedding.shape)

#plot_couple_dimensions("", "transE", "relation_to_index_mapping.pickle")

**MID to Entity Name Mapping**

In [None]:
# Similar to above, this was run on a GCP VM, so will not run
# directly on Colab without making adjustments

import json
import os
import pickle
import re
import requests
import sys
import tqdm
import random

# Given a relation, find all corresponding MIDs and their indexes
relation = "872022"
path = "transDatacopy"
decode_file = 'fb2w.nt'

entities_file = os.path.join(path, 'entities.dict')
relations_file = os.path.join(path, 'relations.dict')

mid_to_index_mapping = dict()
mid_to_name_mapping = dict()
mid_to_url_mapping = dict()

def process_grep_out(out):
    tuples = out.split('\n')
    return [tuple(tup.split('\t')) for tup in tuples]

def add_mappings(mid):
    got_mid = True
    if mid not in mid_to_index_mapping:
        got_mid = False
        stream = os.popen(f'grep {mid} {entities_file}')
        out = stream.read()
        entity_and_ids = process_grep_out(out)
        for entity_and_id in entity_and_ids:
            if len(entity_and_id) == 2:
                eid, entity = entity_and_id
                if entity == mid:
                    mid_to_index_mapping[mid] = eid
                    got_mid = True

    if mid not in mid_to_url_mapping:
        got_mid = False
        converted_mid = mid[1:2] + '.' + mid[3:]
        stream = os.popen(f'grep {converted_mid} {decode_file}')
        out = stream.read()
        mid_to_urls = process_grep_out(out)
        for mid_to_url in mid_to_urls:
            if len(mid_to_url) == 3:
                fb, w3, wiki = mid_to_url
                found_mid = fb[1:-1].split('/')[4]
                if found_mid == converted_mid:
                    mid_to_url_mapping[mid] = wiki[1:-3]
                    got_mid = True
        # If didn't find mid, then remove it from the
        # mid_to_index_mapping map b/c it can't be used
        if not got_mid:
            mid_to_index_mapping.pop(mid)
    return got_mid

def generate_relation_map():
    """ Only needs to be generated once
    """
    with open(relations_file, 'r') as f:
        all_relations = f.read()

    relation_to_index_mapping = dict()
    list_relations = all_relations.split('\n')
    for relation in list_relations:
        out = relation.split('\t')
        if len(out) == 2:
            rid, curr_relation = out
            relation_to_index_mapping[curr_relation] = rid

    with open("relation_to_index_mapping.pickle", 'wb') as f:
        pickle.dump(relation_to_index_mapping, f)

test_file = os.path.join(path, 'test.txt')
stream = os.popen(f'grep {relation} {test_file}')
out = stream.read()
tuples = process_grep_out(out)

if os.path.exists('mid_to_index_mapping.pickle') and os.path.exists('mid_to_url_mapping.pickle'):
    with open('mid_to_index_mapping.pickle', 'rb') as f:
        mid_to_index_mapping = pickle.load(f)
    with open('mid_to_url_mapping.pickle', 'rb') as f:
        mid_to_url_mapping = pickle.load(f)
else:
    all_tuples = []
    for _, tup in enumerate(tqdm.tqdm(tuples, desc="Index and URL For Tuple")):
        if len(tup) == 3:
            head, relation, tail = tup
            if add_mappings(head) and add_mappings(tail):
                all_tuples.append(tup)

    with open('mid_to_index_mapping.pickle', 'wb') as f:
        pickle.dump(mid_to_index_mapping, f)
    with open('mid_to_url_mapping.pickle', 'wb') as f:
        pickle.dump(mid_to_url_mapping, f)
    with open('used_tuples.pickle', 'wb') as f:
        pickle.dump(all_tuples, f)

for _, (mid, url) in enumerate(tqdm.tqdm(mid_to_url_mapping.items(), desc="Name For Tuple")):
    request = requests.get(url)
    x = json.loads(request.text)['entities']
    keys = list(x.keys())
    name = x[keys[0]]['labels']['en']['value']
    mid_to_name_mapping[mid] = name

with open('mid_to_name_mapping.pickle', 'wb') as f:
    pickle.dump(mid_to_name_mapping, f)

generate_relation_map()