In [1]:
from transformers import AutoTokenizer
from zero_shot_re import RelTaggerModel, RelationExtractor

model = RelTaggerModel.from_pretrained("fractalego/fewrel-zero-shot")
tokenizer = AutoTokenizer.from_pretrained("fractalego/fewrel-zero-shot")

  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertModel: ['qa_outputs.weight', 'qa_outputs.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


#### Premier essai

In [2]:
relations = ['container/content', 'other relation']
extractor = RelationExtractor(model, tokenizer, relations)
cubes='the cubes'
box='the box'
ranked_rels = extractor.rank(text='the box must be filled with the cubes', head=box, tail=cubes)
print(ranked_rels)

[('container/content', 0.9546765349805355), ('other relation', 0.3384249806404114)]


In [3]:
relations = ['container/content', 'other relation']
sentence='put water in the glass'
object1='water'
object2='the glass'
extractor = RelationExtractor(model, tokenizer, relations)
ranked_rels = extractor.rank(text=sentence, head=object1, tail=object2)
print(ranked_rels)

[('container/content', 0.8033250272274017), ('other relation', 0.028950750827789307)]


#### Tester la relation de contenance entre deux items

In [4]:
relations = ['container/content', 'other relation']
extractor = RelationExtractor(model, tokenizer, relations)
def contenance(text,item1,item2):
    #print(text,",",item1,",",item2)
    ranked_rels = extractor.rank(text=text, head=item1, tail=item2)
    #print(ranked_rels)
    cont=ranked_rels[0][1]
    other=ranked_rels[1][1]
    if other>0.5:
        return 'other relation'
    elif cont>0.9:
        return 'container/content'
    elif cont>0.8 and other<0.05:
        return 'container/content'
    else:
        return 'other relation'

In [5]:
text='You must put the cubes and the spheres in the boxes'
box='the boxes'
cube='the cubes'
sphere='the spheres'
print(contenance(text,box,cube))
print(contenance(text,cube,sphere))
print(contenance(text,box,sphere))

container/content
other relation
container/content


In [6]:
text='the cubes must be filled with boxes and spheres'
box='boxes'
cube='the cubes'
sphere='spheres'
print(contenance(text,box,cube))
print(contenance(text,cube,sphere))
print(contenance(text,box,sphere))

container/content
container/content
other relation


#### Trouver l'ensemble de toutes les relations de contenance

In [7]:
from itertools import combinations
import spacy

nlp = spacy.load("en_core_web_lg")

def get_chunk(sent):
    doc=nlp(sent)
    nps=doc.noun_chunks
    sentences=[]
    for np1, np2 in combinations([np for np in nps], 2):
        sentences.append((sent,np1.text,np2.text))
    return sentences

def get_rel(sent):
    relations={}
    for (sent,item1,item2) in get_chunk(sent):
        relations[(item1,item2)]=contenance(sent,item1,item2)
    return relations

In [8]:
get_rel('the boxes must be filled with cubes and spheres')

{('the boxes', 'cubes'): 'container/content',
 ('the boxes', 'spheres'): 'container/content',
 ('cubes', 'spheres'): 'other relation'}

In [9]:
get_rel("the treasure is in the chest of the car")

{('the treasure', 'the chest'): 'container/content',
 ('the treasure', 'the car'): 'other relation',
 ('the chest', 'the car'): 'other relation'}

In [10]:
get_rel("put water in the glass")

{('water', 'the glass'): 'container/content'}