In [1]:
from utils import load_models, print_clusters
from utils import IntersectionStrategy, StrictIntersectionStrategy, PartialIntersectionStrategy, FuzzyIntersectionStrategy

In [2]:
predictor, nlp = load_models()

Some weights of BertModel were not initialized from the model checkpoint at SpanBERT/spanbert-large-cased and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
strict = StrictIntersectionStrategy(predictor, nlp)
partial = PartialIntersectionStrategy(predictor, nlp)
fuzzy = FuzzyIntersectionStrategy(predictor, nlp)

In [16]:
text = "Austin Jermaine Wiley (born January 8, 1999) is an American basketball player. He currently plays for the Auburn Tigers in the Southeastern Conference. Wiley attended Spain Park High School in Hoover, Alabama, where he averaged 27.1 points, 12.7 rebounds and 2.9 blocked shots as a junior in 2015-16, before moving to Florida, where he went to Calusa Preparatory School in Miami, Florida, while playing basketball at The Conrad Academy in Orlando."

_, _, _, document, clusters = predictor.predict(text).values()
doc = nlp(text)

In [17]:
print('STRICT Intersection Strategy clusters:\n')
print_clusters(doc, strict.clusters(text))

STRICT Intersection Strategy clusters:

Florida - [Florida; Florida]


In [18]:
print('PARTIAL Intersection Strategy clusters:\n')
print_clusters(doc, partial.clusters(text))

PARTIAL Intersection Strategy clusters:

Wiley - [He; Wiley; he; he]
Florida - [Florida; Florida]


In [19]:
print('FUZZY Intersection Strategy clusters:\n')
print_clusters(doc, fuzzy.clusters(text))

FUZZY Intersection Strategy clusters:

Austin Jermaine Wiley - [Austin Jermaine Wiley; He; Wiley; he; he]
Florida - [Florida; Florida]


In [20]:
print_clusters(doc, clusters)

Austin Jermaine Wiley - [Austin Jermaine Wiley; He; Wiley; he; he]
Florida - [Florida; Florida]


In [21]:
doc._.coref_clusters

[Wiley: [Austin Jermaine Wiley (born January 8, 1999), He, Wiley, he, he],
 Florida: [Florida, Florida]]

In [22]:
# import pandas as pd
# from tqdm.notebook import tqdm

# df = pd.read_csv('gap.tsv', delimiter='\t')
# gap_texts = df['Text'].to_list()

In [23]:
# for i in tqdm(range(100)):
#     text = gap_texts[i]
#     clusters_strict = strict.clusters(text)
#     clusters_partial = partial.clusters(text)
#     clusters_fuzzy = fuzzy.clusters(text)
#     if clusters_strict and clusters_strict != clusters_partial and clusters_partial != clusters_fuzzy:
#         print(f"{i}. {text}\n")