### Model Training Setup

In [1]:
%load_ext autoreload
%autoreload 2
%load_ext jupyter_black

In [2]:
import os

os.chdir("../")

In [3]:
### SETTINGS ###

DATASETS = {
    "FB15k-237-DECODE-ONLY-LABEL": "data/data_processed/FB15k-237/decode_only_label/",
}
MODELS = {
    "bart-small": "lucadiliello/bart-small",
    "bart-base": "facebook/bart-base",
    "bart-large": "facebook/bart-large",
}

# Dataset
DATASET = "FB15k-237-DECODE-ONLY-LABEL"
MODEL = "bart-small"

MAX_LENGTH = 50
BATCH_SIZE = 1

# If True, use only DEV_BATCH of dataset
dev = True
DEV_BATCH = 100

### Load data

In [4]:
import pandas as pd
from src.utils import load_fb15k237

pd.set_option("display.max_columns", None)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("max_colwidth", None)

# Path of processed datasets versioned

processed_data = pd.read_csv(DATASETS[DATASET] + "/processed_data.csv")

### Load the model

In [7]:
from transformers import (
    BartTokenizer,
    DataCollatorForSeq2Seq,
)

import torch

tokenizer = BartTokenizer.from_pretrained(MODELS[MODEL])

### Masking data

In [10]:
processed_data["data_input"] = (
    processed_data["demonstration_input"] + "%s." % tokenizer.mask_token
)
processed_data["data_label"] = processed_data["tail_text"]

if dev:
    if DEV_BATCH == -1:
        pass
    else:
        processed_data = processed_data.head(DEV_BATCH)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  processed_data["data_input"] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  processed_data["data_label"] = (


In [11]:
processed_data

Unnamed: 0,head,relation,tail,head_text,relation_text,tail_text,text,id,demonstration_input,data_input,data_label
0,/m/027rn,/location/country/form_of_government,/m/06cx9,Dominican Republic,has form of government of,republic,Dominican Republic has form of government of republic.,0,New Zealand has form of government of parliamentary system. Republic of the Congo has form of government of presidential system. Dominican Republic has form of government of,New Zealand has form of government of parliamentary system. Republic of the Congo has form of government of presidential system. Dominican Republic has form of government of <mask>.,republic
1,/m/017dcd,/tv/tv_program/regular_cast./tv/regular_tv_appearance/actor,/m/06v8s0,Mighty Morphin Power Rangers,has actor of,Wendee Lee,Mighty Morphin Power Rangers has actor of Wendee Lee.,1,The Jetsons Meet the Flintstones has actor of Henry Corden. Six Feet Under has actor of Richard Jenkins. Mighty Morphin Power Rangers has actor of,The Jetsons Meet the Flintstones has actor of Henry Corden. Six Feet Under has actor of Richard Jenkins. Mighty Morphin Power Rangers has actor of <mask>.,Wendee Lee
2,/m/07s9rl0,/media_common/netflix_genre/titles,/m/0170z3,drama film,has titles of,American History X,drama film has titles of American History X.,2,historical period drama has titles of The Other Boleyn Girl. Bravo has titles of Top Chef. drama film has titles of,historical period drama has titles of The Other Boleyn Girl. Bravo has titles of Top Chef. drama film has titles of <mask>.,American History X
3,/m/01sl1q,/award/award_winner/awards_won./award/award_honor/award_winner,/m/044mz_,Michelle Rodriguez,has award winner of,Naveen Andrews,Michelle Rodriguez has award winner of Naveen Andrews.,3,Jenna Ushkowitz has award winner of Josh Sussman. Paul Dini has award winner of Adam Horowitz. Michelle Rodriguez has award winner of,Jenna Ushkowitz has award winner of Josh Sussman. Paul Dini has award winner of Adam Horowitz. Michelle Rodriguez has award winner of <mask>.,Naveen Andrews
4,/m/0cnk2q,/soccer/football_team/current_roster./sports/sports_team_roster/position,/m/02nzb8,Australia national association football team,has position of,midfielder,Australia national association football team has position of midfielder.,4,FC Kuban Krasnodar has position of goalkeeper. PFC Levski Sofia has position of midfielder. Australia national association football team has position of,FC Kuban Krasnodar has position of goalkeeper. PFC Levski Sofia has position of midfielder. Australia national association football team has position of <mask>.,midfielder
...,...,...,...,...,...,...,...,...,...,...,...
95,/m/06cqb,/music/genre/parent_genre,/m/0827d,reggae,has parent genre of,world music,reggae has parent genre of world music.,101,symphonic rock has parent genre of progressive rock. post-punk has parent genre of reggae. reggae has parent genre of,symphonic rock has parent genre of progressive rock. post-punk has parent genre of reggae. reggae has parent genre of <mask>.,world music
96,/m/014zcr,/base/popstra/celebrity/breakup./base/popstra/breakup/participant,/m/05m63c,George Clooney,has participant of,Krista Allen,George Clooney has participant of Krista Allen.,102,Nicole Kidman has participant of Tom Cruise. Johnny Depp has participant of Kate Moss. George Clooney has participant of,Nicole Kidman has participant of Tom Cruise. Johnny Depp has participant of Kate Moss. George Clooney has participant of <mask>.,Krista Allen
97,/m/0yyg4,/award/award_winning_work/awards_won./award/award_honor/award,/m/027c924,Mississippi Burning,has award of,National Board of Review Award for Best Director,Mississippi Burning has award of National Board of Review Award for Best Director.,103,Kuch Kuch Hota Hai has award of Filmfare Award for Best Supporting Actor. Sex and the City 2 has award of Golden Raspberry Award for Worst Screen Couple/Ensemble. Mississippi Burning has award of,Kuch Kuch Hota Hai has award of Filmfare Award for Best Supporting Actor. Sex and the City 2 has award of Golden Raspberry Award for Worst Screen Couple/Ensemble. Mississippi Burning has award of <mask>.,National Board of Review Award for Best Director
98,/m/0h3y,/location/country/capital,/m/0rtv,Algeria,has capital of,Algiers,Algeria has capital of Algiers.,104,Kingdom of Prussia has capital of Berlin. Austria-Hungary has capital of Vienna. Algeria has capital of,Kingdom of Prussia has capital of Berlin. Austria-Hungary has capital of Vienna. Algeria has capital of <mask>.,Algiers


In [16]:
from src.datasetkgc import DatasetKGC, generate_train_valid_dataset

In [17]:
%%time
train_ds, valid_ds = generate_train_valid_dataset(processed_data, tokenizer, 50)

  0%|          | 0/80 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

CPU times: total: 109 ms
Wall time: 97 ms


In [None]:
torch.save(train_ds, dataset_paths[DATASET] + "/train_ds.pth")
torch.save(valid_ds, dataset_paths[DATASET] + "/valid_ds.pth")