In [1]:
from runHorn import *

import timeit
import pickle
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# init intepretor
age_file = 'data/ageValues.csv'
occ_file = 'data/occupationValues.csv'
cities_file = "data/cityValues.csv"
ethnicity_file = "data/ethnicityValues.csv"

filePaths = [age_file, occ_file, cities_file, ethnicity_file]
attributes = ["age", "occupation", "city", "ethnicity"]
neutralCases = ["mellom 0 og 100", "person", "en ukjent by", "et ukjent sted"]
template = "[MASK] er [age] år og er en [occupation] fra [city] med bakgrunn fra [ethnicity]."
intepretor = Intepretor(attributes, filePaths, neutralCases, template)

In [3]:
# init common values for different language models

# for eq sample size
epsilon = 0.2 # error (differ between model and sampled)
delta = 0.1 # confidence (chance of differ)

V = define_variables(sum(intepretor.lengths.values()) + 2) # length vocabulary
background = generateBackground(V, intepretor.lengths.values()) # prior background knowledge
iterations = 7 # number of iterations for the horn Algorithm

with open('data/background.txt', 'wb') as f:
    pickle.dump(background, f)

In [7]:
# running bert-base-multilingual-cased

lm = "bert-base-multilingual-cased"
hornAlgorithm = HornAlgorithm(epsilon, delta, lm, intepretor, V)

start = timeit.default_timer()
terminated, metadata, h = hornAlgorithm.learn(background, iterations)
stop = timeit.default_timer()
runtime = stop-start
allmetadata = {'head' : {'model' : lm},'data' : {'runtime' : runtime, 'average_sample' : metadata, "terminated" : terminated}}

# saving metadata

with open('data/rule_extraction/' + lm + '_metadata_' + str(iterations) + '.json', 'w') as outfile:
    json.dump(allmetadata, outfile)
# saving extracted Horn Rules
with open('data/rule_extraction/' + lm + '_rules_' + str(iterations) + '.txt', 'wb') as f:
    pickle.dump(h, f)

BertForMaskedLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with anot

iteration = 1, len(H) = 115, runTime = [3.927299799863249]
iteration = 2, len(H) = 118, runTime = [1.1818091999739408]
iteration = 3, len(H) = 92, runTime = [5.445241200271994]
iteration = 4, len(H) = 88, runTime = [1.5964383999817073]
iteration = 5, len(H) = 87, runTime = [1.0384146999567747]
iteration = 6, len(H) = 113, runTime = [9.872332600411028]
iteration = 7, len(H) = 139, runTime = [9.37350710015744]


In [4]:
# setting up lookupTableValues
lookupTable = intepretor.lookTable
lookupTableValues = []
for x in lookupTable.values():
    lookupTableValues += x[0]

lookupTableValues.append("kvinne")
lookupTableValues.append("mann")
print(lookupTableValues)

# setting up background set
with open('data/background.txt', 'rb') as f:
    background = pickle.load(f)


['yngre enn 20', 'mellom 20 og 30', 'mellom 30 og 40', 'mellom 40 og 50', 'mellom 50 og 60', 'eldre enn 60', 'sykepleier', 'helsefagarbeider', 'adjunkt', 'barnehagelærer', 'mekaniker', 'elektriker', 'betongfagarbeider', 'sveiser', 'Oslo', 'Kristiansand', 'Stavanger', 'Bergen', 'Ålesund', 'Trondheim', 'Bodø', 'Tromsø', 'Asia', 'Afrika', 'Nord Amerika', 'Sør Amerika', 'Europa', 'Australia', 'kvinne', 'mann']


In [40]:
# displaying extracted rules for bert-base-multilingual-cased
from displayRules import *

with open('data/rule_extraction/' + "bert-base-multilingual-cased" + '_rules_' + "7" + '.txt', 'rb') as f:
    h = pickle.load(f)
    
all_negations = []
all_implications = []
all_rules = get_all_rules(h, background)
for i in all_rules:
    print(i)
(rules, negations, implications) = make_rule_lists(all_rules)
all_negations = [*all_negations, *negations]
all_implications = [*all_implications, *implications]

negations_count = count_lists(all_negations)
implications_count = count_lists(all_implications)
# print_all_counted_rules(negations_count, implications_count, lookupTableValues)

Implies(v0 & v19 & v23 & v28 & v7, v8)
Implies(v17 & v22 & v29 & v3 & v9, v13)
Implies(v17 & v22 & v29 & v3 & v9, v4)
Implies(v17 & v22 & v29 & v3 & v9, v19)
Implies(v0 & v19 & v23 & v28 & v7, v18)
Implies(v0 & v19 & v23 & v28 & v7, v12)
Implies(v17 & v22 & v29 & v3 & v9, v15)
Implies(v17 & v22 & v29 & v3 & v9, v28)
Implies(v17 & v22 & v29 & v3 & v9, v21)
Implies(v0 & v19 & v23 & v28 & v7, v29)
Implies(v0 & v19 & v23 & v28 & v7, v24)
Implies(v0 & v19 & v23 & v28 & v7, v9)
Implies(v0 & v19 & v23 & v28 & v7, v16)
Implies(v0 & v19 & v23 & v28 & v7, v13)
Implies(v0 & v19 & v23 & v28 & v7, v4)
Implies(v17 & v22 & v29 & v3 & v9, v26)
Implies(v17 & v22 & v29 & v3 & v9, v25)
Implies(v0 & v19 & v23 & v28 & v7, v17)
Implies(v0 & v19 & v23 & v28 & v7, v3)
Implies(v17 & v22 & v29 & v3 & v9, v2)
Implies(v0 & v19 & v23 & v28 & v7, v15)
Implies(v0 & v19 & v23 & v28 & v7, v21)
Implies(v17 & v22 & v29 & v3 & v9, v5)
Implies(v17 & v22 & v29 & v3 & v9, v7)
Implies(v0 & v19 & v23 & v28 & v7, v25)
Implies(

In [28]:
relevant_implications = get_relevant_implications2(all_implications, all_negations)
relevant_implications_count = count_lists(relevant_implications)
# print_all_counted_rules(negations_count, relevant_implications_count, lookupTableValues)

In [5]:
# running norbert

lm = "ltg/norbert2"
hornAlgorithm = HornAlgorithm(epsilon, delta, lm, intepretor, V)

start = timeit.default_timer()
terminated, metadata, h = hornAlgorithm.learn(background, iterations)
stop = timeit.default_timer()
runtime = stop-start
allmetadata = {'head' : {'model' : lm},'data' : {'runtime' : runtime, 'average_sample' : metadata, "terminated" : terminated}}

# saving metadata

with open('data/rule_extraction/' + "norbert2" + '_metadata_' + str(iterations) + '.json', 'w') as outfile:
    json.dump(allmetadata, outfile)
# saving extracted Horn Rules
with open('data/rule_extraction/' + "norbert2" + '_rules_' + str(iterations) + '.txt', 'wb') as f:
    pickle.dump(h, f)

BertForMaskedLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.
Some weights of the model checkpoint at ltg/norbert2 were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture

[MASK] er yngre enn 20 år og er en barnehagelærer fra Stavanger med bakgrunn fra et ukjent sted.
[MASK] er mellom 0 og 100 år og er en person fra Kristiansand med bakgrunn fra Australia.
[MASK] er eldre enn 60 år og er en sykepleier fra Trondheim med bakgrunn fra Sør Amerika.
iteration = 1, len(H) = 113, runTime = [7.0323967998847365]
[MASK] er mellom 20 og 30 år og er en barnehagelærer fra Oslo med bakgrunn fra Afrika.
[MASK] er mellom 0 og 100 år og er en person fra en ukjent by med bakgrunn fra et ukjent sted.
iteration = 2, len(H) = 118, runTime = [2.4881052002310753]
[MASK] er mellom 30 og 40 år og er en adjunkt fra Ålesund med bakgrunn fra Australia.
[MASK] er mellom 50 og 60 år og er en elektriker fra Kristiansand med bakgrunn fra Australia.
iteration = 3, len(H) = 92, runTime = [14.730223400052637]
[MASK] er mellom 20 og 30 år og er en elektriker fra Bodø med bakgrunn fra Afrika.
iteration = 4, len(H) = 89, runTime = [2.991078699938953]
[MASK] er yngre enn 20 år og er en person

In [7]:
# displaying extracted rules for norbert2
from displayRules import *

with open('data/rule_extraction/' + "norbert2" + '_rules_' + "7" + '.txt', 'rb') as f:
    h = pickle.load(f)
    
all_negations = []
all_implications = []
all_rules = get_all_rules(h, background)
# all_rules.pop(10)
# for x in all_rules:
#     print(x)
# print(make_rule_lists(all_rules))
(rules, negations, implications) = make_rule_lists(all_rules)
all_negations = [*all_negations, *negations]
all_implications = [*all_implications, *implications]

negations_count = count_lists(all_negations)
implications_count = count_lists(all_implications)
print_all_counted_rules(negations_count, implications_count, lookupTableValues)

0.100  :  not (Bodø & Afrika & kvinne & mellom 40 og 50 )
0.100  :  Bodø & Afrika & kvinne & mellom 40 og 50  ---> adjunkt
0.100  :  Bodø & Afrika & kvinne & mellom 40 og 50  ---> Nord Amerika
0.100  :  Bodø & Afrika & kvinne & mellom 40 og 50  ---> sveiser
0.100  :  Bodø & Afrika & kvinne & mellom 40 og 50  ---> sykepleier
0.100  :  Bodø & Afrika & kvinne & mellom 40 og 50  ---> yngre enn 20
0.100  :  Bodø & Afrika & kvinne & mellom 40 og 50  ---> mellom 30 og 40
0.100  :  Bodø & Afrika & kvinne & mellom 40 og 50  ---> Europa
0.100  :  Bodø & Afrika & kvinne & mellom 40 og 50  ---> Stavanger
0.100  :  Bodø & Afrika & kvinne & mellom 40 og 50  ---> Ålesund
0.100  :  Bodø & Afrika & kvinne & mellom 40 og 50  ---> elektriker
0.100  :  Bodø & Afrika & kvinne & mellom 40 og 50  ---> Sør Amerika
0.100  :  Bodø & Afrika & kvinne & mellom 40 og 50  ---> Australia
0.100  :  Bodø & Afrika & kvinne & mellom 40 og 50  ---> mellom 20 og 30
0.100  :  Bodø & Afrika & kvinne & mellom 40 og 50  ---> b

In [8]:
relevant_implications = get_relevant_implications2(all_implications, all_negations)
relevant_implications_count = count_lists(relevant_implications)
print_all_counted_rules(negations_count, relevant_implications_count, lookupTableValues)

0.100  :  not (Bodø & Afrika & kvinne & mellom 40 og 50 )
