### Techniques to convert KGC in plain text

We will follow [From Discrimination to Generation: Knowledge Graph Completion with Generative Transformer](https://arxiv.org/pdf/2202.02113.pdf).

In [1]:
%load_ext autoreload
%autoreload 2
%load_ext jupyter_black

In [2]:
cd ..

c:\Users\Matheus\Documents\Git\knowledge-graph-completion


In [3]:
# Load data
from src.utils import load_fb15k237, load_wn18rr, get_hist

PATH_FB15k237 = "data/datasets_knowledge_embedding/FB15k-237"

train, valid, test, entity2wikidata = load_fb15k237(PATH_FB15k237)

c:\Users\Matheus\Documents\Git\knowledge-graph-completion\venv\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll
c:\Users\Matheus\Documents\Git\knowledge-graph-completion\venv\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll


In [4]:
# Filter data without entity description
import pandas as pd

pd.options.mode.copy_on_write = True

all_data_fb = pd.concat([train, valid, test], axis=0)

df_entity = pd.DataFrame(entity2wikidata.keys(), columns=["head"])

all_data_fb_filtered = all_data_fb[
    all_data_fb["head"].isin(df_entity["head"])
    & all_data_fb["tail"].isin(df_entity["head"])
]

In [5]:
def map_relation_to_text(relation):
    return "has " + relation.split("/")[-1].replace("_", " ") + " of"

In [6]:
all_data_fb_filtered["head_text"] = all_data_fb_filtered["head"].apply(
    lambda i: entity2wikidata[i]["label"]
)
all_data_fb_filtered["relation_text"] = all_data_fb_filtered["relation"].apply(
    lambda i: map_relation_to_text(i)
)
all_data_fb_filtered["tail_text"] = all_data_fb_filtered["tail"].apply(
    lambda i: entity2wikidata[i]["label"]
)

all_data_fb_filtered["text"] = (
    all_data_fb_filtered["head_text"]
    + " "
    + all_data_fb_filtered["relation_text"]
    + " "
    + all_data_fb_filtered["tail_text"]
    + "."
)

all_data_fb_filtered["id"] = all_data_fb_filtered.index

In [7]:
all_data_fb_filtered

Unnamed: 0,head,relation,tail,head_text,relation_text,tail_text,text,id
0,/m/027rn,/location/country/form_of_government,/m/06cx9,Dominican Republic,has form of government of,republic,Dominican Republic has form of government of r...,0
1,/m/017dcd,/tv/tv_program/regular_cast./tv/regular_tv_app...,/m/06v8s0,Mighty Morphin Power Rangers,has actor of,Wendee Lee,Mighty Morphin Power Rangers has actor of Wend...,1
2,/m/07s9rl0,/media_common/netflix_genre/titles,/m/0170z3,drama film,has titles of,American History X,drama film has titles of American History X.,2
3,/m/01sl1q,/award/award_winner/awards_won./award/award_ho...,/m/044mz_,Michelle Rodriguez,has award winner of,Naveen Andrews,Michelle Rodriguez has award winner of Naveen ...,3
4,/m/0cnk2q,/soccer/football_team/current_roster./sports/s...,/m/02nzb8,Australia national association football team,has position of,midfielder,Australia national association football team h...,4
...,...,...,...,...,...,...,...,...
20461,/m/0l5yl,/people/person/profession,/m/02hrh1q,Jack Benny,has profession of,actor,Jack Benny has profession of actor.,20461
20462,/m/01z5tr,/people/person/nationality,/m/09c7w0,Debra Messing,has nationality of,United States of America,Debra Messing has nationality of United States...,20462
20463,/m/0p5mw,/music/artist/contribution./music/recording_co...,/m/06w87,David Mansfield,has performance role of,steel guitar,David Mansfield has performance role of steel ...,20463
20464,/m/0727h,/military/military_conflict/combatants./milita...,/m/0bk25,Second Punic War,has combatants of,Macedonian kingdom,Second Punic War has combatants of Macedonian ...,20464


In [13]:
import tqdm
from tqdm.auto import tqdm

tqdm.pandas()


def train_demonstration_generator(row):
    # select 2 samples of rows with same relation.
    # generate triple of select rows
    # contat triples and return

    # other heuristic is split all relations e groups of 3 and use 2 for demonstration 1 to fill
    
    to_fill = row.head_text + row.relation_text + " "
    return " ".join(
        all_data_fb_filtered[
            (all_data_fb_filtered["relation"] == row.relation)
            & (all_data_fb_filtered["id"] != row.id)
        ]
        .sample(2, random_state=42)["text"]
        .to_list()
        + [to_fill]
    )



In [20]:
import swifter

all_data_fb_filtered["demonstration_input"] = all_data_fb_filtered.swifter.apply(
    lambda row: train_demonstration_generator(row), axis=1
)

Pandas Apply: 100%|██████████| 304205/304205 [1:05:18<00:00, 77.63it/s]


In [21]:
all_data_fb_filtered.to_csv(
    "data/datasets_knowledge_embedding/FB15k-237/processed_data.csv", index=False
)

In [22]:
pd.read_csv("data/datasets_knowledge_embedding/FB15k-237/processed_data.csv").head(10)

Unnamed: 0,head,relation,tail,head_text,relation_text,tail_text,text,id,demonstration_input
0,/m/027rn,/location/country/form_of_government,/m/06cx9,Dominican Republic,has form of government of,republic,Dominican Republic has form of government of r...,0,New Zealand has form of government of parliame...
1,/m/017dcd,/tv/tv_program/regular_cast./tv/regular_tv_app...,/m/06v8s0,Mighty Morphin Power Rangers,has actor of,Wendee Lee,Mighty Morphin Power Rangers has actor of Wend...,1,The Jetsons Meet the Flintstones has actor of ...
2,/m/07s9rl0,/media_common/netflix_genre/titles,/m/0170z3,drama film,has titles of,American History X,drama film has titles of American History X.,2,historical period drama has titles of The Othe...
3,/m/01sl1q,/award/award_winner/awards_won./award/award_ho...,/m/044mz_,Michelle Rodriguez,has award winner of,Naveen Andrews,Michelle Rodriguez has award winner of Naveen ...,3,Jenna Ushkowitz has award winner of Josh Sussm...
4,/m/0cnk2q,/soccer/football_team/current_roster./sports/s...,/m/02nzb8,Australia national association football team,has position of,midfielder,Australia national association football team h...,4,FC Kuban Krasnodar has position of goalkeeper....
5,/m/04nrcg,/soccer/football_team/current_roster./soccer/f...,/m/02sdk9v,Maldives national football team,has position of,forward,Maldives national football team has position o...,5,Sunderland A.F.C. has position of defender. A....
6,/m/07nznf,/film/actor/film./film/performance/film,/m/014lc_,Bryan Singer,has film of,Star Trek: Nemesis,Bryan Singer has film of Star Trek: Nemesis.,6,Hank Azaria has film of Quiz Show. Adam Baldwi...
7,/m/02qyp19,/award/award_category/nominees./award/award_no...,/m/02d413,BAFTA Award for Best Original Screenplay,has nominated for of,Philadelphia,BAFTA Award for Best Original Screenplay has n...,7,Golden Raspberry Award for Worst Screenplay ha...
8,/m/0q9kd,/award/award_nominee/award_nominations./award/...,/m/0184jc,Danny DeVito,has award nominee of,Guy Pearce,Danny DeVito has award nominee of Guy Pearce.,8,Peter Krause has award nominee of Justin Thero...
9,/m/03q5t,/music/performance_role/regular_performances./...,/m/07y_7,harpsichord,has role of,violin,harpsichord has role of violin.,9,lead guitar has role of Dobro. theremin has ro...
