In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import pickle
import time
from ipypb import track

import GCP_GD3 as gcp
from general_functions import sqrt_err_relative, check_coo_tensor, gen_coo_tensor
from general_functions import create_filter, hr

### Zero launch of a function(for NUMBA):

In [2]:
init_shape = (100, 100, 10)
coo, vals = gen_coo_tensor(init_shape, density=0.002)
assert check_coo_tensor(coo)!= "Bad"

In [5]:
%%time

max_iter = 12
rank = 5
seed = 13

a, b, c, err, it = gcp.gcp_gd(
    coo, vals, init_shape,
    gcp.bernoulli_logit_loss,
    gcp.bernoulli_logit_loss_grad,
    rank=rank,
    lr=0.1,
    l2=0,
    max_iter=max_iter,
    tol=1e-8,
    seed=seed,
    show_iter=False,
    it_over=False,
)

rerr = sqrt_err_relative(
    coo, vals, init_shape, a, b, c,
)
print(rerr, err[-1])

0.9999628598575782 [0.69319699]
CPU times: user 5.57 ms, sys: 0 ns, total: 5.57 ms
Wall time: 5.49 ms


### Load the data:

In [6]:
path_data = "Link_Prediction_Data/FB15K237/"
entity_list = pickle.load(open(path_data + 'entity_list', 'rb'))
relation_list = pickle.load(open(path_data + 'relation_list', 'rb'))

train_triples = pickle.load(open(path_data + 'train_triples', 'rb'))
valid_triples = pickle.load(open(path_data + 'valid_triples', 'rb'))
test_triples = pickle.load(open(path_data + 'test_triples', 'rb'))
train_valid_triples = pickle.load(open(path_data + 'train_valid_triples', 'rb'))

entity_map = pickle.load(open(path_data + 'entity_map', 'rb'))
relation_map = pickle.load(open(path_data + 'relation_map', 'rb'))

all_triples = train_valid_triples + test_triples

In [7]:
test_filter = create_filter(test_triples, all_triples)  
valid_filter = create_filter(valid_triples, all_triples)  

### Create Sparse COO Train Tensor:

In [8]:
values = [1] * len(train_triples)
values = np.array(values, dtype=np.float64)

coords = np.array(train_triples, dtype=np.int32)
nnz = len(train_triples)
data_shape = (len(entity_list), len(relation_list), len(entity_list))
data_shape

(14541, 237, 14541)

## GCP-GD3:

In [177]:
max_iter = 1000 #1000
rank = 100 #2
l2 = 0 #0
lr = 100 #100.5
seed = 2 #13  -> 0.197

In [None]:
%%time

a, b, c, err_arr, it = gcp.gcp_gd(
    coords, values, data_shape,
    gcp.bernoulli_logit_loss, #gaussian_loss
    gcp.bernoulli_logit_loss_grad, #bernoulli_logit_loss
    rank=rank,
    lr=lr,
    l2=l2,
    max_iter=max_iter,
    tol=1e-8,
    seed=seed,
    show_iter=False,
    it_over=True,
)

In [None]:
rerr = sqrt_err_relative(
    coords, values, data_shape, a, b, c,
)
print(f"Relative error = {rerr}, {(np.isnan(a)).sum()}, {(np.isnan(b)).sum()}, {(np.isnan(c)).sum()}")

In [None]:
plt.xlabel("Iteration")
plt.ylabel("Relative error")
plt.title(f"FB15k-237 / GCP-GD3(R={rank})")
#plt.xticks(np.arange(it))
plt.yscale("log")
plt.plot(np.arange(1, it+1), err_arr[:it], '-*', c="#8b0a50")

In [None]:
%%time
hr(valid_filter[:1000], valid_triples[:1000], a, b, c, [1, 3, 10])

%%time
hr(test_filter[:1000], test_triples[:1000], a, b, c, [1, 3, 10])