# Implementation of the article on Poincarré Embeddings 

In [111]:
import autograd.numpy as np

def process_text(path):
    clean_dataset = []
    with open(path, "r") as f:
        dataset = f.readlines()
    for el in dataset:
        clean = el[49:]
        clean = clean[:-22]
        clean = clean.strip("VIDEO:")
        clean = clean.strip("AUDIO:")
        clean = clean.strip(" ")
        clean = clean.strip(' ')
        clean = clean.strip("|")
        clean_dataset.append(clean)
    return np.array(clean_dataset)

data = process_text("Health-Tweets/bbchealth.txt")
print(data.shape)

(3929,)


In [128]:
def distance(u, v):
    assert np.linalg.norm(u) < 1 and np.linalg.norm(v) < 1
    norm2u = np.linalg.norm(u, ord=2)**2
    norm2v = np.linalg.norm(u, ord=2)**2
    norm2distuv = np.linalg.norm((u - v), ord=2)**2
    t = 1 + 2 * (norm2distuv / ((1 - norm2u) * (1 - norm2v)))
    return np.arccosh(t)

def grad_dist(theta, x):
    beta1 = 1 - (np.linalg.norm(x, ord=2))**2
    alpha1 = 1 - (np.linalg.norm(theta, ord=2))**2
    gamma1 = 1 + (2 / (alpha1 * beta1)) * (np.linalg.norm((theta - x), ord=2))**2
    const1 = 4 / (beta1 * np.sqrt(gamma1**2 - 1))
    grad_1 =  const1 * (((np.linalg.norm(x, ord=2))**2 - 2 * np.dot(theta, x) + 1) * (theta1 / gamma1**2) - x / alpha1)
    beta2 = 1 - (np.linalg.norm(theta, ord=2))**2
    alpha2 = 1 - (np.linalg.norm(x, ord=2))**2
    gamma2 = 1 + (2 / (alpha2 * beta2)) * (np.linalg.norm((theta - x), ord=2))**2
    const2 = 4 / (beta2 * np.sqrt(gamma2**2 - 1))
    grad_2 =  const2 * (((np.linalg.norm(theta, ord=2))**2 - 2 * np.dot(theta, x) + 1) * (theta2 / gamma2**2) - x / alpha2)
    grad = [grad_1, grad_2]
    return grad

def rsgd(theta, stepsize=0.001, n_iter=100):
    for itr in range(n_iter):
        i = np.random.randint(0, x_train.shape[0])
        const = ((1 - np.linalg.norm(theta)**2)**2 / 4)
        theta = projection(theta - stepsize * const * grad_dist(theta, i))
        
def projection(theta, epsilon=1e-5):
    norm_theta = np.linalg.norm(theta, ord=2)
    if norm_theta >= 1:
        return (theta / norm_theta) - epsilon
    else:
        return theta
    