In [1]:
from promptore_utils import *


if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

print(device)
    # Simulating argparse in a notebook environment
class Args:
    def __init__(self):
        self.seed = 0  # Random seed
        self.n_rel = 100  # Number of relations/clusters
        self.max_len = 300  # Maximum length of tokens
        self.auto_n_rel = False  # Set to True if you want to estimate the number of clusters
        self.min_n_rel = 777  # Minimum number of relations to estimate (if auto_n_rel=True)
        self.max_n_rel = 1000  # Maximum number of relations to estimate (if auto_n_rel=True)
        self.step_n_rel = 5  # Step size for relation estimation (if auto_n_rel=True)
        self.files = []  # Files to load from Fewrel (leave empty for now)
        self.data = "wikiphi3"

args = Args()

# Read wikiphi3 files
df_dataset = parse_wikiphi3("DATA/wikiphi3_data_49410.pickle")

# Compute relation embeddings
print("Compute relation embeddings")
relation_embeddings = compute_promptore_relation_embedding(
    df_dataset, template="{sent} {e1} [MASK] {e2}", max_len=args.max_len, device=device)

# Compute clustering
print("Compute clustering")
if args.auto_n_rel:
    n_rel = estimate_n_rel(
        relation_embeddings, args.seed, (args.min_n_rel, args.max_n_rel), args.step_n_rel)
    print(f'Estimated n_rel={n_rel}')
else:
    n_rel = args.n_rel

print("Predict labels")
predicted_labels = compute_kmeans_clustering(relation_embeddings, n_rel, args.seed)

# Evaluation
b3, b3_prec, b3_rec, v, v_hom, v_comp, ari = evaluate_promptore(relation_embeddings, 
                                                                predicted_labels)
print(f'B3: prec={b3_prec} rec={b3_rec} f1={b3}')
print(f'V-measure: hom={v_hom} comp={v_comp} f1={v}')
print(f'ARI={ari}')



  from .autonotebook import tqdm as notebook_tqdm


cpu
Data len:  49410
Data len final:  49360
Compute relation embeddings


Some weights of the model checkpoint at P0L3/clirebert_clirevocab_uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at P0L3/clirebert_clirevocab_uncased and are newly initialized: ['bert.pooler.dense.bias', 'be

Compute clustering
Predict labels
B3: prec=0.04054565099945974 rec=0.9963533225283633 f1=0.07792040520580798
V-measure: hom=0.5784330801777592 comp=0.9988815659444814 f1=0.7326200163581971
ARI=5.2073274403939215e-05


In [2]:
len(predicted_labels)
relation_embeddings["predicted_labels"] = predicted_labels

In [3]:
relation_embeddings

Unnamed: 0,input_tokens,input_attention_mask,input_mask,output_r,head,tail,sentence,output_label,embedding,predicted_labels
0,"[tensor(1), tensor(4665), tensor(4395), tensor...","[tensor(1), tensor(1), tensor(1), tensor(1), t...",35,record held,He,1500 metres,"He is the current holder of the 1500 metres, m...",0,"[tensor(-0.1041), tensor(-0.7516), tensor(-0.0...",46
1,"[tensor(1), tensor(4351), tensor(8648), tensor...","[tensor(1), tensor(1), tensor(1), tensor(1), t...",64,engine configuration,Tech IV,I4,The Iron Duke Pontiac engine VIN code A (also ...,1,"[tensor(1.7321), tensor(-0.2729), tensor(-0.85...",19
2,"[tensor(1), tensor(5744), tensor(16288), tenso...","[tensor(1), tensor(1), tensor(1), tensor(1), t...",52,language regulatory body,Basque language,Euskaltzaindia,"Euskaltzaindia (literally, ""group of keepers o...",2,"[tensor(-0.6728), tensor(-0.6439), tensor(-0.3...",19
3,"[tensor(1), tensor(4351), tensor(17515), tenso...","[tensor(1), tensor(1), tensor(1), tensor(1), t...",36,conferred by,Letters of the Living,Báb,The Báb referred to the 18 Letters of the Livi...,3,"[tensor(0.3393), tensor(-0.4137), tensor(0.745...",32
4,"[tensor(1), tensor(4421), tensor(23), tensor(8...","[tensor(1), tensor(1), tensor(1), tensor(1), t...",41,crew member(s),Expedition 1,William Shepherd,"On 2 November 2000, Expedition 1 Commander Wil...",4,"[tensor(-0.3407), tensor(-1.8051), tensor(0.11...",62
...,...,...,...,...,...,...,...,...,...,...
2463,"[tensor(1), tensor(4611), tensor(7439), tensor...","[tensor(1), tensor(1), tensor(1), tensor(1), t...",23,were expressed as,fuel loads,dry-weight basis,All fuel loads (fuel weight per unit surface a...,2453,"[tensor(-0.4017), tensor(-0.4794), tensor(-1.0...",75
2464,"[tensor(1), tensor(11283), tensor(4377), tenso...","[tensor(1), tensor(1), tensor(1), tensor(1), t...",87,appear at,plectonemes,considerable distance,Contrary to typical textbook diagrams which sh...,2454,"[tensor(0.5765), tensor(-0.6824), tensor(-1.36...",60
2465,"[tensor(1), tensor(4351), tensor(4516), tensor...","[tensor(1), tensor(1), tensor(1), tensor(1), t...",19,had performance,model,calibration period,The model had a better temporal performance in...,2455,"[tensor(0.7606), tensor(-0.6099), tensor(-0.34...",50
2466,"[tensor(1), tensor(5251), tensor(10574), tenso...","[tensor(1), tensor(1), tensor(1), tensor(1), t...",32,peaked in,Brazil's share,2003,Although Brazil’s share decreased in certain y...,2456,"[tensor(0.1688), tensor(-0.5033), tensor(-0.82...",1


In [10]:
relation_embeddings[relation_embeddings["predicted_labels"] == 10].sort_values(by="output_r")[["head", "output_r", "tail"]]

Unnamed: 0,head,output_r,tail
1894,Non-renewable purchased resources,are classified under,resource type
2305,land cover,belongs to group,first group
888,flow velocity,calculated by,turbulence model
926,heat value,calculated from,module heating capacity
2267,potential AEP,calculated using,wind turbine power curve
1215,lake effect case,can be compared with,synoptic case
85,bathtub curve,codomain,hazard function
405,non-breeding season,compared to,breeding season
1849,lion grade,composition includes,bottom ash ingot
1224,α,constrained by,experiment


In [11]:
df_dataset.iloc[1397]

print(df_dataset.iloc[1397]["sent"])
print(df_dataset.iloc[1397]["r"])
print(len(df_dataset.iloc[1397]["sent"]))

for _, a in df_dataset.sample(20).iterrows():
    print(len(a["sent"]))
    

The comparison of the total daily simulation time for Price1, Price2, Price3 and Price0 using GA-EED shows that Price1 achieves a simulation time of 14.23 s, Price2 achieves a simulation time of 13.80 s, Price3 achieves a simulation time of 11.66 s while Price0 achieves a simulation time of 13.61 s. However, GA-EED achieves the lowest operational cost of US$8224.00 compared to US$8238.60 and US$8414.81 for MEED algorithm and PSO respectively.
    
achieves
451
158
78
182
284
204
124
157
142
245
161
299
344
496
129
181
132
201
130
155
68
