In [1]:

import dataclasses

from pybrat.parser import BratParser, Entity, Event, Example, Relation


In [12]:

# Initialize a parser.
brat = BratParser(error="ignore")
examples = brat.parse("/home/julio/repos/DeepEventMine/brat/brat-v1.3_Crunchy_Frog/example-data/corpora/BioNLP-ST_2011")


In [13]:
len(examples)

80

In [3]:

# The praser returns dataclasses.
assert len(examples) == 80
assert all(isinstance(x, Example) for x in examples)
assert all(isinstance(e, Entity) for x in examples for e in x.entities)
assert all(isinstance(e, Relation) for x in examples for e in x.relations)
assert all(isinstance(e, Event) for x in examples for e in x.events)


In [4]:

id_ = "BioNLP-ST_2011_EPI/PMID-19377285"
example = next(x for x in examples if x.id == id_)
print(example.text)
print(len(example.entities), next(iter(example.entities)))
print(len(example.relations), next(iter(example.relations)))
print(len(example.events), next(iter(example.events)))


Epigenetic inheritance through self-recruitment of the polycomb repressive complex 2. 
Maintenance of gene expression through epigenetic mechanisms such as DNA- and histone-methylation is essential for preserving cellular identity and function. Multiplication of eukaryotic cells requires that the DNA content of the cell is duplicated through replication, which is coupled to incorporation of de novo synthesized core histones into nucleosomal structures. One of the challenging questions in biology is to explain how the organism ensures that regulatory epigenetic marks, once established, are transferred from one cell generation to the next. Based on studies in our laboratory, we have recently proposed a model for how the methylated lysine 27 of histone H3 (H3K27) can be stably transmitted through the cell division cycle. We found that the Polycomb Repressive Complex 2 (PRC2), which is responsible for di- and trimethylation of H3K27 (H3K27me2/me3), binds to its own site of methylation. Mor

In [5]:

# Use dataclasses.asdict to convert examples to dictionaries.
examples = [*map(dataclasses.asdict, examples)]
assert all(isinstance(x, dict) for x in examples)
assert all(isinstance(e, dict) for x in examples for e in x["entities"])
assert all(isinstance(e, dict) for x in examples for e in x["relations"])
assert all(isinstance(e, dict) for x in examples for e in x["events"])

print(examples[0])

{'text': 'Canine COL1A2 mutation resulting in C-terminal truncation of pro-alpha2(I) and severe osteogenesis imperfecta. \nRNA and type I collagen were analyzed from cultured skin fibroblasts of a Beagle puppy with fractures consistent with type III osteogenesis imperfecta (OI). In a nonisotopic RNAse cleavage assay (NIRCA), the proband\'s RNA had a unique cleavage pattern in the region of COL1A2 encoding the C-propeptide. DNA sequence analyses identified a mutation in which nucleotides 3991-3994 ("CTAG") were replaced with "TGTCATTGG." The first seven bases of the inserted sequence were identical to nucleotides 4002-4008 of the normal canine COL1A2 sequence. The resulting frameshift changed 30 amino acids and introduced a premature stop codon. Reverse-transcription polymerase chain reaction (RT-PCR) with primers flanking the mutation site amplified two complementary DNA (cDNA) fragments for the proband and a single product for the control. Restriction enzyme digestions also were consi

In [11]:
brat = BratParser()
examples = brat.parse("/home/julio/repos/DeepEventMine/brat/brat-v1.3_Crunchy_Frog/data/1370299-brat")


In [9]:
# example = next(x for x in examples)
print(example.text)

for example in examples:
    print(len(example.entities), next(iter(example.entities)))
    # print(len(example.relations), next(iter(example.relations)))
    print(len(example.events), next(iter(example.events)))


Autocrine angiotensin system regulation of bovine aortic endothelial cell migration and plasminogen activator involves modulation of proto-oncogene pp60c-src expression.

Rapid endothelial cell migration and inhibition of thrombosis are critical for the resolution of denudation injuries to the vessel wall. Inhibition of the endothelial cell autocrine angiotensin system, with either the angiotensin-converting enzyme inhibitor lisinopril or the angiotensin II receptor antagonist sar1, ile8-angiotensin II, leads to increased endothelial cell migration and urokinase-like plasminogen activator (u-PA) activity (Bell, L., and J. A. Madri. 1990. Am. J. Pathol. 137:7-12). Inhibition of the autocrine angiotensin system with the converting-enzyme inhibitor or the receptor antagonist also leads to increased expression of the proto-oncogene c-src: pp60c-src mRNA increased 7-11-fold, c-src protein 3-fold, and c-src kinase activity 2-3-fold. Endothelial cell expression of c-src was constitutively ele

In [10]:
len(examples)

1

In [14]:
from brat_parser import get_entities_relations_attributes_groups


In [15]:

entities, relations, attributes, groups = get_entities_relations_attributes_groups(
    "/home/julio/repos/DeepEventMine/brat/brat-v1.3_Crunchy_Frog/data/1370299-brat/PMID-1370299.ann")


In [20]:
entities

{'T1': Entity(id='T1', type='Organism', span=((43, 49),), text='bovine'),
 'T2': Entity(id='T2', type='Cell', span=((50, 73),), text='aortic endothelial cell'),
 'T3': Entity(id='T3', type='Gene_or_gene_product', span=((88, 109),), text='plasminogen activator'),
 'T4': Entity(id='T4', type='Gene_or_gene_product', span=((148, 157),), text='pp60c-src'),
 'T5': Entity(id='T5', type='Cell', span=((177, 193),), text='endothelial cell'),
 'T6': Entity(id='T6', type='Multi-tissue_structure', span=((295, 306),), text='vessel wall'),
 'T7': Entity(id='T7', type='Cell', span=((326, 342),), text='endothelial cell'),
 'T8': Entity(id='T8', type='Simple_chemical', span=((353, 364),), text='angiotensin'),
 'T9': Entity(id='T9', type='Gene_or_gene_product', span=((389, 418),), text='angiotensin-converting enzyme'),
 'T10': Entity(id='T10', type='Simple_chemical', span=((429, 439),), text='lisinopril'),
 'T11': Entity(id='T11', type='Simple_chemical', span=((493, 507),), text='angiotensin II'),
 'T12'