In [1]:
import torch
import argparse
import numpy as np
import torch.nn as nn
from config import Config
from torch.nn import functional as F
from utils.tokenizer import Tokenizer, load_vocab
from model.pretrain_model import ToEmbedding
from torch.utils.data import Dataset, DataLoader

In [2]:
class VocabDataset(Dataset):
    def __init__(self, filename, tokenizer, max_length):
        self.tokenizer = tokenizer
        self.max_length = max_length
        with open(filename, 'r+') as f:
            texts = f.readlines()
        self.data = self.process_data(texts)
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self,idx):
        return self.data[idx]

    def process_data(self, texts):
        data = []
        for text in texts:
            tokens = tokenizer.tokenize(text)
            if len(tokens) > self.max_length - 2:
                tokens = tokens[: (self.max_length - 2)]
            tokens_id, tokens_masks, segment_id = self.tokenizer.create_feature(tokens)
            data.append((tokens_id, tokens_masks, segment_id, text.strip()))
        return data

def collate_fn(batch_data):
    batch_size = len(batch_data)
    batch_data = list(zip(*batch_data))
    lengths = [len(x) for x in batch_data[0]]
    max_len = max(lengths)
    tokens_id = torch.LongTensor(batch_size, max_len).fill_(0)
    tokens_masks = torch.LongTensor(batch_size, max_len).fill_(0)
    segment_id = torch.LongTensor(batch_size, max_len).fill_(0)
    
    for i in range(batch_size):
            tokens_id[i, :len(batch_data[0][i])] = torch.LongTensor(batch_data[0][i])
            tokens_masks[i, :len(batch_data[1][i])] = torch.LongTensor(batch_data[1][i])
            segment_id[i, :len(batch_data[2][i])] = torch.LongTensor(batch_data[2][i])
    return (tokens_id, tokens_masks, segment_id, batch_data[3])

def dataloader(filename, batch_size, tokenizer, max_seq_length=200,
                    dropout=0.01, shuffle=False, lower=True, num_workers=8,
                    collate_fn=collate_fn):
    dataset = VocabDataset(filename, tokenizer, max_seq_length)
    data_loader = DataLoader(dataset=dataset, 
                            batch_size=batch_size, 
                            shuffle=shuffle, 
                            pin_memory=True, 
                            num_workers=num_workers, 
                            collate_fn=collate_fn)
    return data_loader

In [3]:
vocab = load_vocab('dataset/vocab.txt')
tokenizer = Tokenizer(vocab)

data_loader = dataloader('dataset/to_embedding/vocab.txt', 32, tokenizer)

  cpuset_checked))


In [7]:
cfg = Config()
cfg.load_config('config.json')

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
checkpoint = torch.load('saved_models/biobert-base-cased-v1.1.pth')

model = ToEmbedding(embedding_dim=100, vocab_size=len(vocab), device=device, checkpoint=checkpoint,**cfg.config)
model.to(device)
dense_layer = torch.load('saved_models/to_embedding.pt')

model.dense_layer.load_state_dict(dense_layer['state_dict'][0])

Config loaded from file config.json


<All keys matched successfully>

In [9]:
pooling = 'mean'
with torch.no_grad():
    res = []
    for idx, (inputs, masks, token_type_ids, metadata) in enumerate(data_loader):
        inputs = inputs.to(device)
        masks = masks.to(device)
        token_type_ids = token_type_ids.to(device)
        
        encoder_output = model.bert(inputs, masks, token_type_ids)[0]
        encoder_output = model.dense_layer(encoder_output).detach()
        
        embeddings = []
        
        if pooling == 'first':
            embeddings = encoder_output[:, 0, :]
        elif pooling == 'sum' or pooling == 'mean':
            masks = masks.detach()
            masks = torch.nn.functional.pad(masks[:,2:],(1,1))
            masks = masks.unsqueeze(-1).expand(encoder_output.shape)
            sub_embeddings = (masks.to(torch.float) * encoder_output)
            embeddings = sub_embeddings.sum(dim=1)
            
            if pooling == 'mean':
                masks = masks[:,:,0].sum(dim=-1).unsqueeze(1)
                embeddings = embeddings / masks.to(torch.float)
            
        for embedding, each_metadata in zip(embeddings,metadata):
            res.append((each_metadata, embedding.tolist()))

In [13]:
len(res)

103223

In [15]:
for item in res:
    print(item[0])

,
and
of
the
.
in
)
(
cancer
to
a
with
expression
that
/
is
cells
cell
for
was
as
breast
by
we
tumor
:
were
gene
patients
lung
from
or
human
tissues
genes
metastasis
on
this
an
study
tissue
protein
are
%
which
prostate
be
normal
tumors
carcinoma
associated
gastric
results
growth
role
levels
1
these
cancers
disease
significantly
our
may
0
invasion
its
compared
liver
increased
lines
including
have
p53
proliferation
2
between
found
treatment
using
has
progression
ovarian
not
analysis
=
target
but
also
been
potential
p
colorectal
at
stage
pathway
samples
most
primary
+
signaling
factor
showed
survival
bladder
through
mice
both
clinical
pancreatic
colon
high
receptor
data
identified
development
mrna
than
thyroid
3
migration
apoptosis
hcc
such
novel
renal
other
therapeutic
expressed
suppressor
can
activity
well
higher
methods
used
it
their
activation
model
;
oncogenic
risk
mutations
common
metastatic
epithelial
overexpression
crc
cases
present
brain
skin
here
cervical
studies
level
effects
d

runx1
2007
elisa
describes
consisted
offers
his
untranslated
neutrophils
nasal
killer
pathophysiological
kdm2a
muscle-invasive
concept
attenuate
minimally
pan-cancer
incident
na
4t1
atf4
rest
allows
38-69
altering
a3b
cxcr2
cell-specific
tuberous
regimens
microbial
c-kit
overview
proteomics
il-10
platinum
scaffold
shp2
native
mir-19a
fap
round
dementia
depends
co-expressed
2d
v2
disrupted
until
h3k4
mds
variability
determinant
benefits
oa
feasible
make
p53-null
adaptive
2000
issue
wilms'
p16ink4a
vdac1
phosphatidylinositol
ctcl
glaucoma
sw620
final
verify
give
opportunities
reciprocal
slit2
concerning
nc
fed
reductase
sepsis
mcc
stk11
establishing
pronounced
question
select
mir-506
pthrp
j
chamber
ppi
movement
biosynthesis
adolescents
angiomyolipomas
dependence
ctla-4
changed
codon
communication
ia
activators
gains
downregulates
space
routine
pluripotency
d2
beclin1
all-cause
ap-1
antiproliferative
malt
schwannomas
pubmed
functioning
pecam1
neuroectodermal
morphologically
mmp-7
augment

tbp
rtt
death-1
wherein
refers
ai
surfactant
schizophrenia
drug-resistance
gof
ap4
trap
trpm2
nlrp3
pneumonitis
feeding
orthotopically
hydrogen
infiltrates
urethra
forebrain
ang
400
fumarate
completion
1999
fusion-positive
examines
morphogenetic
sustain
contraction
mir-98
ancestry
ing1
grb2
necrotizing
pdgfb
catalyzes
mir-30a-5p
mir-196a
aldh1a3
pharmacologically
epithelial-derived
chronically
antagonizes
carries
phosphorylates
wrn
disc
high-density
disease-associated
cg
scheduled
socs2
mir-613
hoxb13
beam
prion
ceacam1
exclude
compensatory
ipca
estrogens
cytometric
psmb5
nrf2-mediated
adenoviruses
tfe3
sirt7
tip
cervicitis
lrp
adhesions
lacks
proneural
nedd4
5-htt
keratitis
resorption
kim-1
tlr3
fat1
tk
yang
confirms
swelling
icr
zfx
non-tumour
plasmacytoma
u19
plaques
microenvironmental
ect2
y-box
utrs
analogs
sigmoid
ulk1
cyclooxygenase
octamer-binding
aggregates
zones
autoantibody
hras-transformed
krasc118s
pim-3
cerebrum
rnaseq
flt3
proportions
ic
northern
incorporated
axial
flank

removing
theranostic
african-american
flexible
palpable
instable
neuroblastic
chi-square
plotter
trib2
histotype
trb3
hinders
get
hpa
sexually
-5p
synaptophysin
sham
boundaries
trx1
colony-stimulating
ladc
eya4
ppis
satisfactory
mycobacterium
siah2
f344
chfr
peg
txn
4a
187
chc
204
prognoscan
hrh1
tyro3
irs2
lpa1
phgdh
rap80
tsp1
ultrasonographic
mir-149
endometriotic
epidemiol
proc
natl
acad
disappeared
scd
fbp1
principles
casein
microrna-383
t-lbl
exciting
shifting
gst
steady
adapted
mir-101-3p
visualize
ema
client
pools
deformity
sessile
coprevalent
publication
primates
lesional
nursing
canadian
carcinogenicity
connecting
apoptosis-associated
sqc
igf2-low
granulomatosis
infertility
mir-15a
colons
nephrology
tremendous
record
hsa-mir-16
cenpe
sema3f
reprogrammed
polyadenylation
rnase
condensation
mta2
round-cell
a7
admixed
sheep
approval
au-rich
mir-140-5p
statin
mpc1
pruritic
nifedipine
debilitating
sncg
ovca
workup
ms-k
151
insensitive
cpt1a
tilt
sputum
favoring
327
hits
192
adverse

s6k2
forty-two
demyelination
prkd1
phospho-p38
uncoupling
tomato
lmna
manufacturing
tmem174
moles
5-aza-2-deoxycytidine
tim-4
zd55-shmycn
prompt
arhgef7
hex
pmdc05
lps-pdc
tcl1a
pursue
rps20
regorafenib
cyclinb1
undescribed
apc-deficient
ari
sox18
vasoconstriction
hypoxia-driven
deep-sequencing
biomolecular
ire1
myoblasts
thorough
hey2
selp
usp8
nos1
mnk1
calu-1
ppp1r11
lymphoepithelial
estrogen-independent
ppp2r2a
psen1
moloney
lowers
spliceosome
customized
fifty-four
adenosarcoma
endovascular
krt14
trib1
lysosome
radiolabeled
deguelin
sequestosome-1
medicinal
polyendocrinopathy
werner
move
ipsc
atrogin-1
deubiquitination
equivalents
vip
fk506
roughly
epidermolysis
chka
antitumoral
sea
spc-induced
isthmus
flcn-deficient
sucrose
high-sensitivity
bilirubin
concentrate
cartilaginous
girl
come
drug-sensitive
cgp
tmb
neo-angiogenesis
subcomplex
transplantable
proline
65-73
187-197
226-234
74-82
ras-mapk
agenesis
down-modulation
lrh
cell-of-origin
chl1
-off-flavor
pro-migratory
u133
interpr

diffusely
self-assembled
hkg
amine
anthracotic
fats
bear
ccng2
lady
ofhirna-pdx1
peri-tumoral
neogenin-1
integrase
tramp-c1
store-operated
reactivates
s100b
rras
eosin
miseq
dio2
leucovorin
shox2
occurrences
factor-beta
cldn4
dznep
her2neu
chromatin-immunoprecipitation
relb-deficient
glycosaminoglycans
epha5
sgce
ar-42
myxofibrosarcomas
co-treated
vaccinia
bisphenol-a
disruptor
masticatory
radicals
kvdmr1
slc1a3
maneuver
ass
anoikis-resistant
whites
cl3
gpx5
dll3
ccl24
edu
mir-1254
kat2b
mildly
ophthalmoplegia
adamts8
kettering
il21
ambulatory
neurotrophins
bitransgenic
cd184
vbl
cd27
self-renew
ventrolateral
cardio-renal
glucagonoma
cof
smoc-2
sh2
slc2a9
auto-regulatory
keap1-kd
discussions
510
declines
c26
tgm2
transmigration
tilapia
cyp2c19
cda
cyp450
cdh3
olfactomedin
genbank
co-opted
wt-
car-positive
axdadb3-f
hspa6
ctnnal1
non-human
nin
slug-induced
teaching
tumour-derived
flattened
gist-t1
pc-3m-luc-c6
oxidized
732
saccule
delicate
swallowing
stream
ikap
llc1
il2ra
msr1
t1-2
inc

l3pl
cs-e
gas1
albino
atp5j
kntc2
akt1s1
co-morbidities
psmp
eif4a1
rpl35a
hepcidin
heparin
colour
tyroidal
1-9
cyb5a
t1-t2
deregulates
ekvx
tick
650
plx3397
taos2
growth-suppressive
bannayan-riley-ruvalcaba
remissions
pcv
n-6
vibrio
pdl2
echogenic
hoxb8
glycoprotein-75
bag-75
mepe
dmp-1
pkn1
down-expressed
amylin
oligomers
lmp1-induced
cataloging
bcl2l2
-126
sub-retinal
cd177
stmn2
prss23
er-stress
apoa1
rasa2
fop
cc10
bf
granulosum
immuno-histochemistry
societies
low-penetrance
adipocytokines
git2
trenbolone
acetate-stimulated
mir-372-3p
co-immunoprecipitated
angiogram
nicotine-induced
h-scores
endophthalmitis
capn4
gpx6
scgb1d2
androstane
mawbp
habib
wp1066
-d
cne
4b
schlap1
nurr1
emt-mediated
unr
2d-dige
nhbe
rpsa
parp-2
distally
dx2
scleral
mir-101-2
arvcf
selinexor
-y-box
hf-fed
pdss2
244
batf2
accession
rosiglitazone
3-id1-seap-id1-mcherry
plg
fuels
gpr109a
aphasia
cell-permeable
mem
motivation
let7d
hpv-ve
matics
time-polymerase
adamts
gpr55
4to7
autophagosomal
biocompatibility

alpha-methylacyl-coa
additively
medronate
dehp
non-pulmonary
compensating
clinico-radiologic
lbcl
nonthymic
b-induced
v-atpases
galectin-7
laminins
iwr-1
dusp3
wgs
dhh
intraabdominal
familiarity
p95
hbme-1
cir
celsr3
mucin-1
baf57
e4orf4
hydrops
hsa-mir-10b
hsa-mir-448
cpi-613
metacarpal
semg
asses
du
agencies
shmt
cernet
xuanwei
durations
cyp-d
wfdc1
esirna
pros
neurokinin-1
vacuolating
msi-2
v12g
shisa3
mcf7-fr
desmoid
opn-i
unmasked
en2
painless
wdr70
xp-c
kwak
ced
itga2b
mes-4
vmp1
igf1r-kd
653
hydrouretero-nephrosis
'radiation
wnv
tlr1
exemestane
tnfaip1
dedicator
fg
nisin
card6
doxo
adenofibroma
proton-coupled
paralleling
mtor-dependent
zfp42
imq
fbln1
hsa-mir-133a-1
hsa-mir-196b
ttty14
fdc-p1
extracting
microphthalmia-associated
top-2a
cell-division
caudal-type
cimp-negative
strictures
empsgc
ptip
car-deficient
designs
5-1
hindering
adipokine
40-78
staget1n0m0
patient--t2n0m0
19--of
dep
pre-ect
rating
ets-2
gsdmc
adcy2
psma1
promoter-binding
microrna-106a
est
eif4g1
urgency
tspy

trim35
umbilicus
adhesion-mediated
illuminating
dt-j
pygl
hypoxia-associated
pingyangmycin
zfpm2
cimp-h
prok2
reverse-overlapped
cxcr4-targeted
mt3
ampd2
rasa5
mrc5
p-tnm
tyrosine-regulated
monoblastic
trans-acting
macrolide
nstcs
erythroblastic
avulsion
nop
bim-1
rewiring
p53-compromised
binding-site
tpr-met
sbi-756
tgf-beta1
damps
8p22-p23
submucous
let-7a-5p
prps1
mammosphere-forming
m2r
hsa-mir-1-2
hsa-mir-486
c10orf91
flj41941
glyco-genes
galnt7
saturation
activatable
3243a
erg1
firing
h2afz
rothmund-thomson
ttc7a
ct-lncrnas
fractured
preirradiated
myr-akt
mgp96
calm2
sulf2-deficient
bone-resorbing
unsuitable
fbxo50
convey
iaslc
pak1-rufy3
co-upregulation
xpnpep2
msl2
ralbp1
trophoblasts
prdm10
parp6
382
12q24
neoformans
epidermal-hyperplasic
39-year-old
kpt-330
smad3-
aifm2
tdt
microrna-144
ldm
multitier
mir557
pkib
rcc-cic
dab2ip-mediated
subchondral
post-fertilization
engineer
ogg1-2a
spaceflight
physician
hmg-box
glyoxalase
arhgef2
cellminer
hsamir-
hyperpermeability
paca
erad

snv
mrcl
pdfgrb
fibrosarcoma-based
hmha1
-cxc
e171
hec
mrc2
temporalis
mir-885-5p
pbrm1-
bap1-
nation
il6-st
securin
ckm
zip
308
478
amplicon-based
paclitaxel-exposed
cptc
aml1-eto
prcscs
prtics
basale
vasospasm
volunteer
-ptu
mfn2
sertoliform
corded
uba
047
046
designations
surface-based
sorts
parakeratosis
nanoscale
microrna-488
r249s
arg-1
glioma-relevant
um-1
reforming
a-g-g-t
psr
dnah8
map1b
photoactivatable-ribonucleoside-enhanced
skin-structure
yp
cross-cancer
fbi-1
bpv
prodigiosin
a-181g
station
do-7
wtkras
eye-specific
mcas
jf1
leakiness
kpna4
mirna-29b
dimethylated
catholic
dnfb
vegf-producing
talazoparib
authority
urologist
music
241
mirna-206
stressor
medians
riboflavin
b-cell-receptor
signal-based
plod3
s12a2
non-alternating
b-acute
phototherapeutic
push
midlife
uplc-ms
reaf
velcade
15th
sds-page
nudt6
sepsis-related
convexity
tremor
excisional
cis-antisense
cud
mt3-mmp
nrf2-p62-keap1
cs-like
oestrogens
podoplanin-positive
b16-f1
aesthetics
brca-associated
hypogammaglobuli

lmw-e
63-year-old
nmiia
k120
macroh2a1-target
cipn
gon4l
splenocyte
microrna-100
hpv16e6
t2a
fnas
haemoglobinopathies
mll-all
protein's
n0m0
gp130-jak-stat3
sickness
practitioners
map2k6-fp
prescribing
left-side
nmscs
co-related
non-cg
enamel
pak2-p34
osteoinductive
bfl1
aa-treated
striatins
myst3
myst3-high
myst3-low
croatian
mst3
stk24
ysk1
stk25
sok1
ppar-d
ser83
k67
finder
concurrence
heartburn
recovers
eif2s3
cytoophidia
romberg
gatad1
enoxaparin
icca
time-based
mir-4269
p-ilc
p-lcis
dld-1-k-ras-wt
nm23-m1
deciduoma
dfg
y220c
i47v
l418m
ipmn-specific
adenocarcinoma-driving
r132h
carnosic
folfox
c2orf44
dysostosis
weaknesses
panax
cecostomy
dmr2
snurf
igls
inter-subject
evaluative
mir-661
brb-array
protparam
motifscan
signalp
tmhmm
netphos2
predictprotein
mir-4728-3p
940
interdependent
curcumin-induced
serca
stat5-mediated
vdj
dntt
hybridoma
ptg
ccr2a
sema3e
thirty-three
exon-intron
fsm
esam-ko
estrogen-regulated
j3t-2
krt23
non-inferior
mir-106a-363
iopns
g1-arrest
5years
psychoso

hsa-mir-199b-3p
hsa-let7b-5p
hsa-mir-10a-5p
angiodysplasia
mir-761
re-classified
postive
laparoscopy-assisted
non-gliomas
subdued
socs5
t7-lpc
white-tailed
snord116
culminate
irag
mrvi1
chemokine-like
retropubic
zbrk1
inconstant
obfuscated
15-17
phospho-jak1
pfkfb4-depleted
mom
georgia
lineal
descendents
rankl's
intensification
icd-9-cm
gstm4
4-nqo-induced
darc
short-form
tnfrsf19
40-50
mir-519a
faf1
lkb1-defective
myxomatous
advisable
rs2057482
peginterferon
peg-inf
rivabirin
dalekos
eur
gastroenterol
hepatol
933-939
sookoian
dermatol
1000-1000
subopulation
poglut1
erythroleukaemia
h3255
113b7
nmr-guided
mcs5c
p53-specific
golph3l
3-hydroxy-3-methylglutaryl-coenzyme
subglottic
irreversibly
bipedicled
supratrochlear
arachidonate
immune-modulating
rfviia
telomerase-independent
hydrolyzes
mll4-gps2
trim58
hkdc1
f55a3
supt16h
rba-1
set-16
hda-1
swsn-7
let-526
medium-grade
yap-runx3
cytosols
aliphatic
immunopathogenic
billions
dhh-rhebl1
interrupts
meningococci
polychlorinated
biphenyls
no

head-only
acceleration-time
pedestrian
icsdm
fe-mb
elastic-plastic
lymphoid-specific
wdr76
multi-ethnic
mir-589-3p
panther
pellucidum
mir-488
linearity
receptor-overexpressing
atheroma
foxa2-centered
att-20
rs187115
worst-case
slc30a1
rorc
fdr-adjusted
vi-rads
sir2
poison-resistant
30-hz
a3f
-95
gse50081
s30
b-tfa
j-tfa
pc3-gd3s
reep5
reep6
rcbtb2
high-magnitude
gsec
t10
h-cadherin
4-oh
sip1-negative
bimatoprost-treated
latanoprost-treated
ikca1
loss-of-heterozygosity
nucleobindin
haf
gnb1
extra-gynecologic
monthly
gene-cancer
syndecan-4
gcda
abcb9
masp2
ggt7
tap73-mediated
us28
erks
mc38-ova
vegfr2-luc
fasciotomies
polypeptide-like
sphere-formation
lepidoptera
hymenoptera
trip6
pdgs
i-crp
igfbp-4
sequestrating
broadens
rtq-pcr
b-zip
gp-73
nonprevalent
hev
patrol
noac
allergen-specific
pro-oxidant
cathelicidin-related
sauropodomorphs
hdls
il1-beta
unsurprisingly
eg1
cont
yap1-tead1
bcas
gsi
aiec
amyloidoses
iplex
glabrata
rs3660
44-0
tumor-directed
sarcoma-positive
bi836845
mcaf-tumor


withdrawal-mediated
trachomatous
trichiasis
pbef1
nmprtase
analytically
methanesulfonate
trim44-knockdown
upper-extremity
serratus-anterior
med14
bmpr
recreate
sourced
hsg
a253
foundation-7
pentacyclic
tmcao
anti-n-methyl-d-aspartate
anti-nmda
sirna-inhibited
smmhc
agt-ccr3
agt-ccl9
acquisitions
10mm
tiq-a
mpn-ap
tf-expressing
tf-negative
transdifferentiations
mt-nd6
mt-cyb
mt-atp6
mt-rnr1
adenopolyps
psn-1
nugc3
pd-l1-expressing
non-osteogenic
dihydrodiol
osmrs
fluorimetric
cotransduced
npmc
r140q
protein-73
rqu
snord48
bloodflow
linc00668
cornification
atm-chk2-cdc25c-p21waf1-cdc2
signified
nongenomic
terminologies
brigham
annexectomies
see-fim
precluded
iva-pla2-mediated
cd324
esophagus-derived
renowned
sports
stra8
irf8-mmp3
anxa2-cxcl12
aezs-136
denied
utero-tubal
latencies
stimulation-dependent
bone-metastasizing
colony-
phos-s6
dermoscopy
hbv-mir-2
sar405838
mito-resistant
system-associated
asbo
transglutaminase-2
antiprostate
musculorum
mir-148
id-1b
diprotin
banked
hbx-depende

anti-tuberculous
canvas
supressed
gene-therapy
cinobufagin
hspa1
lactic-co-glycolic-acid
carboxy-terminal-binding
gsd-ia
non-operative
snb
pkc-independent
retroversion
pig3-mediated
intracorporeal
-designated
degeneration-related
xq26-q27
post-induction
mir-3188
ppp1r15a
carbenoxolone
slc45a3-erg
ak001058
inhba-as1
mir-573
-451
ad-tgp-cre
lnl-braf
938
ifna
wd-repeat
unprecedentedly
bilayered
cis-associated
microrna-29s
dzqe
cd-exposed
shortlisted
far-reaching
hyperubiquitylated
understandable
hiseq2000
cancer-gynaecological
seropositivity
cd99l2
p-erm
y397
antiepileptics
mir-4510
sixfold
vegfab
glutathione-s-transferase
psma-psa
hpin
associators
dtd
hist1h1t
sapcd2
znf695
corrective
2b1
angiopoietins
non-glial
parasagittal
p63-mediated
adenoca
venography
her2h878y
htr-8
svneo
hoxa10-mediated
proteolipid
dpysl3v2
promoter-targeted
hydatidosisis
metaphyseal
didn
krasg
co-transcriptional
3-46
alpha-motif
1b-like
anks1b-like
rhoa-like
p53r273h
pseudovirus
v3b
parapodial
abortion
figured
se

dimerized
rac-trio
attenuations
squid-meg
ggggc-repeat
rmst
bftc905
tlr9-expressing
26b-targeted
ccrf-cem
non-tumorigenicity
pc-dna3
prcc1
prcc2
caps2
arhgap21
hoxb6
spint1-deficient
dcv
-stress
gbm-
c19orf33
c3orf52
c4orf19
pre-osteoclasts
vprbp-ddb1-cul4-roc1
caf-aspc-1
oncodriveclust
icages
drgap
958
readthrough
lurbinectedin
epithelial-related
acetylenic
thioquinolines
propargyl
4-chloro-2-butynyl
4-acyloxy-2-butynyl
sw707
p388
cisplatin-refractory
5-flurouracil
tumor-generated
copper-binding
lox-like
tho
meis2c
anl
snx29
rwv
2500
ser80
trichophyton
microsporum
epidermophyton
il-18-transgenic
foxa1-slug
hpv-opc
microrna-130a
mgo-peg-2me
nanocomposite
acrylamide
tnfrsf7
varlilumab
2005-06
islet-immune
gene-knockdown
t1n0m0
majorly
generalise
osmoregulation
euryhaline
caffeine-exposed
cdc28
tgase
tip60-p400
rec
fizzy
dystonic
pre-rna
biofeatures
oncogene-specific
androgen-resistant
nbcsp
hcr-gli
fourier
hpv18-infected
elf2
elf2a
elf2b
elf4
ligularia
fischeri
bergapten-induced
hmga2-e

iodine-based
netnmf
beta-defensin
felted
pressure-relief
half-shoe
total-contact
foot-care
non-infected
footwear
dressings
limbu
magar
brahmin
55-6
mcf-7-xenografted
bodyweights
drews-elger
iorns
dias
miller
dean
campion-flora
rodrigues
reis-filho
rae
el-ashry
lippman
camerae
pneumatopores
samd4
epilimnetic
anoxic
hypolimnia
alps
posoperative
cd123-positive
893g
gh-releasing
stem-
rt-outcome
p16-status
rlp1b
at-hook2
gad25
nonpsychiatric
30-31
annotate
structural-
plx5568
suprascapular
29mm
22mm
fret
chlorhexidine
diclofenac
betamethasone
gastrogavage
ss1
hypokalemic
goncha
seso
enesie
woreda
amhara
anlage
chir99021
hgcl2
phospho-focal
convexities
tumor-type
hypofibrinogemia
hypercytokinemia
low-moderate
tpn
isothiocyanate-induced
constrictive
sosp-9607
molecule-4
obtaincancerinfo
ell1
prominin
halofuginone
bullard
inter-cecal
y-shaped
bian
jn
pro-glp-1
misinterpretation
fas-l
fdopa
sav1-knockout
pre-microrna
complex-dependent
instigated
ras-responsive
grn's
inflamation
cytoskeleton-
t

637
il8ra
aqp9-sirna
fizzled
hdue
fenhexamid
er-expressing
laking
ots
cob223
wv
pr-mediated
sox2-sox9
irish
beaumont
7p3a
kpt-335
cd123-eng
tccs
leasions
fol
nidogen
saprophyte
dicer1-expressing
terahertz
prog
head-like
ipsc-derived
rodnan
hfref
uhrf1-dnmt1-g9a-snail1
ltr-primed
hdm2-dependent
assertion
pyrido-thieno-pyrimidine
lipl32
zea
diarrhea-predominant
capasso
1240-1243
healy
coll
radiol
281-288
laukkanen
1646-1650
opaque
k-nuf2
mirna-26a-5p
hypo-methylated
gut-bone
gli-reporter
patellar
yorkshire
thrombospondins
vhl-mice
p76del-mutated
postoperation
brca-ness
kinase-muscle
fusobacterium-high
fb-high
ivs1-27g
ngf-p75
situs
fodrin
32d
b3a2
32dp210
32dp210-t315i
mdf
ameliorative
cause-outcome
ny-ren-54
aldh1a3-induced
semiautomated
allograftfailure
cryaa
small-nucleolar
snora55
esps
rnf6-amplified
hmti
e-mpm
17-5p
b4g2
rehabilitaion
selenocysteine-containing
cusccs
mir-1304
germline-inactivating
abused
asah
top2ahigh
top2aneg
saccules
book
notch1-sox9
asr
durom
metasul
789
9a
cvp


mehg
iee
physical-chemical
aop
montefiore
nabu
h3k4ac
stably-expressing
glioma-induced
adenocarcinoma-induced
agrammatic
logopenic
enforcing
ferroportin-conducted
h295
epac1-overexpressing
tectal
vmp1-dependent
mate2-k
antidiabetes
microrna-511
sms2-deficient
alpha-fetoprotein-targeted
orotic
car-specific
villosities
mir-214-fgfr1
topoisomerases
mmp1b
mcar
niger
tbr2
dcfda
ibd1
ly-294002
p-akt1
transition-inducing
linc00094
il-6-deficient
jwh-015
necessities
rsna
agm-mediated
mrc5sv
hypoxia-upregulated
ptx3-derived
fgf-binding
pentapeptide
nsc12
caspase-8-like
8f1
cc1-mono
utilises
cgy
omega-6
procarcinogen
ddah2
rscc-61
sun-1
tnfa-
tnfr1-
d-leu-4
-ob3
hpde6c-7
stomach-derived
222-mediated
dats-induced
factors-mediated
gank
tucked
pre-and
polycation
instructive
midorganogenesis
amido-piperizine
splunc1-mir-141-target
mir-193b-upa
mir-381-
lrrc4-mek
prns-1-1
il-fabp
zyflamend
nbr2
fiberscopy
i148m
ggc
endometria
ferulic
tracerx
dmm
gm-scf
cell-substratum
low-density-lipoprotein
megalin


t32-phosphorylation
iv-derived
6-cepn
grn-
upci
scc084
hela-h4-pegfp
cox-2-overexpressing
pc-csc
seaweed
hxbm408's
pediococcus
acidilactici
nr042057
1893
thrombus-derived
obi
hbsag-negative
theevd
cg-rich
sp-binding
mt-8
mt-9
storiform
mfhs
calpain-5
6k
24k
gastrin-deficient
da6034
jurkat-axl
anti-macroautophagic
cma-mediated
re-examination
12-o-tetradecanoylphorbol
15d-pmj2
phospho-rtk
rp11-322e11
ac093609
ctc-297n7
leiomyosarcomatous
hsa-let-7a-1
nk-1r-mediated
mmp-seq
ametastasis
obligately
smoa1tg-driven
smoa1tg
tissuescan
cd147-cd44s-stat3
neo-endothelial
long-period
myofibroblast-derived
gx15-070
post-cranial
munc18-2
stxbp2
syntaxin-binding
xpc-dependent
dimethylsulphate-footprinting
g-tetrads
g4-motif
bibliographic
1990-2019
prg2
bt474-pten-ltt
factor-6
mage-3a
pp65-responding
bi-potential
cooh-mnps
transcript-14
metablism
h-efp
lef1-as1
rp11-296e3
limk2b
lim-kinase
no-carrier-added
pan-cytokeratins
hla-g3
gse5787
vouching
sbf
nicd-csl-maml
lnascs
obasc-derived
cp-31398-restore

il-17rc-dependent
din1a
stricter
pharmacovigilance
parapoxvirus
rpl30
rpl37a
egln1-mediated
hnrnap1
cadets
mir-34a-p53
craniometaphyseal
metaphyses
low-wt1
intracarotid
trimodality
s18
waldeyer's
protein-encoding
sox2ot-mir-194-5p
mir-122-sox3-tdgf-1
microrna-1207-3p
gef-dead-prex1
reorganized
iaep
mir-944-bnip3-mmp-caspase-3
hapten-induced
collagenolytic
para-areolar
clip2-positive
h2a-4t
tusc3-silenced
xw
shu
bie
statip1-depleted
del-dem
sumo-modified
imcompleted
bone-colonizing
1120
3-week-on-1-week-off
wenyujin
desiring
cpg-free
pca-screened
non-stem-like
rnf157
il-27r-
t-cell-dependent
ikgkb
ube21
fab-type
rs2075606
ttpd
mms13
scfv-mms13
-scfv
-mms13
-scfv-mms13
ko240
wt145
esi-regulated
r3
csf-contacting
csf-c
righting
19-34
os-genesis
lysine382
reg1
intron-luciferase
recql1-
wrn-sirnas
androgen-and
hcc-cscs
keap1-knockdown
menses
brahma
adenomagenesis
hgtb
intertrigo
erosio
interdigitalis
onychia
hlsccs
siapp-treated
dq594040
over-represents
nationality
kgf-
mmp-9-positive
o-glc

relocalisation
preinjection
serca2b
paf15
transactivators
tax-expressing
microrna-20b-5p
keratoacanthoma-like
fbni
1275
1055
7-color
ngs-panel
normal-looking
observes
x-engrafts
hlfs
brca1-dependent
white-matter
c3hba
tut1
mir205hg
excavated
b16f1
cd44-snail-mmp
driver-related
m-vcr-r
cenp-c
centromere-kinetochore
nucleoside-nucleotide
anti-vegfr
il-3-stimulated
f3-1
nos3-
spermatids
mir-383-ldha
stroma-based
xentuzumab
esc-3
undergrowth
non-modifiable
dialysis-dependent
xia
spns
zuogui
pill
expounding
connotation
rs4444903
one-hundred
inflatable
foley
hemorrhage-induced
equatorial
left-handers
in-dota-he3-zcaix
3-he3-zcaix
in-dtpa-g250
fab'
mac-1
tissue-expression
pc5
pc7
microfibrial-associated
1r
gats
tulia
gesture
estrogen-synthesizing
rgi
pectin
-dde
-dda
dll4-deficient
neuron-microglia
nat6531
gln787gln
tra-ir700
phosphoinositol-3
aip2
microrna-551b-3p
virus-triggered
residul
krab-associated
aptx
non-interventional
sd-oct-a
pylori-type
lunasin
fizz1
re-stimulated
50-400
morgue
co

ac8
orai1-activating
oasf
generalizable
dextro-position
secundum
arteriosus
myc-nick-induced
non-bl
cvs
intra-ventricular
inter-ventricular
rs17860508
il12-ss1
sb743921
iggsd
spad
tat-p4-
datc5
thalassemias
deoxynucleotide
ofimmune
ps1iv
hypoxia-like
sad-associated
angiocrine
scaffold-based
rsk4-shrna
mlh1-positive
9704
cancer-initiated
02--20
temporo-occipital
operculum
efts
fgfr2-mutated
-652g
tissues-of-origin
vdi
cullin3-klhl25
g5
dual-delivery
cis-diamminedichloroplatinum-
ccnd
aromatase-dependent
18srna
papolinkerp144
eua
reciprocally-regulated
csgalnact1
edem3
ufc1
mir-298
rs2227310
rs13010627
casp1and
4-benzothazines
obliterate
ict1-specific
jlr
be13
sap130
msts-c
k24
fgdy
fgd1
hamls
hcb
-cd34
hrhpv-rna
physician-
dc-lamp
mirna499
rs2981578
btv
65-70
non-oc
xiap-binding
bsglwe-induced
anticolorectal
phosphatidylcholine-specific
colony-derived
100cells
cavernosal
postjunctional
rc-77
nanoparticle-enabled
defa5
double-stain
mediolaterally
l-arginase
p-igfir
atp11b
nash-driven
dyn

-jiang
bst
atp2b1
anthracycline-dependent
loss-of-fat1-function
transgene-induced
uke-1
inter-malignant
zfx-mediated
pca-lda
prodifferentiation
rs4246444
vav3-sirna-transfected
three-membered
antigen-mediated
icgn
megn
ip1--the
comminuted
814
tyr1349
moloney-murine
tnfrsf8
tnfsf8
mir-101-dnmt3a
aqp5-sirna
gasotransmitters
ho-2
pkn2
telo2
mrpl9
mttl1
klhl20
pi4kb
th1-oriented
nanovectors
c-dim12
nurr1-regulated
p-ezrin
rem2a
cytohesin
nett
tp53-defective
hbsag-seropositive
hbsag-seronegative
milliliter
casp8-ir
polymer-nanoclay-based
validations-in
biopsies-we
ahscs
cleaned
suppressor-regulating
set8-
cpth2
neutrophil-derived
lung-protective
pneumonia-inducing
scribbled
d2-mediated
firefighting
paclitaxel-mediated
cyp3a11
tlr4-independent
iemg
nci-h716
pgrmc1-transfected
sex-mismatched
andrunx1
nci-h82
domain-function
tyr225cys
674a
y225c
tmd8
bap1-mutant
anoestrus
spayed
wax-embedded
cell-cancer-associated
residualized
bitragion
lip-chin
utc
w4p
lhb
ptp4a3-driven
ptp4a3-mediated
straig

epp62
epp65
epp5
epp71
epp74
hoxb13-deficient
hoxb13-positive
melignant
gstt1-deleting
sac-2
1987-1989
cspc
extraribosomal
non-smallcell
oncomirna
klf6-e2f1
reversal-adrenal
ahc
reg-i
pd1-pdl1-targeted
ym201636
15mb
4-thiazolidinone
les-2194
les-3377
les-3640
4-thiazolidinones
progression-opposing
6n
nephron-based
erbb4-mapk
microrna-130b-3p
fhl124
nephrotoxin-induced
cfe-mri
reactants
excretory-secretory
pup
gna13-3
-utr-reporter
cell-growth-related
0003221
kaplan-meir
hpa-positive
tc-rpmm
memberships
mycn-mir-26a-5p-lin28b
diurnally
lxr-dependent
4-dihydropyridine
vdie-2n
7-stimulated
3-p21
haphazardly
regulators--histone
erastin
crocodile
cdk-1
perinasal
6q15-deletions
barghout
zepeda
vincent
azad
steed
postovit
rrgs
keratorefractive
anti-oncostatin
preparatory
subjectvies
wsd
igf1-pi3k-dependent
enzyme--phosphoglycerate
sord
cdv3
elas-creer
brl
epo-compounds
bf-rtk
uantitative
intelectin
dcm-ds-treated
nbnma-induced
compulsory
cullin-ring-containing
19year
alk-tkis
mir-642b-3p
pro-

preauricular
act-associated
arosarena
dela
cadena
denny
bryant
thorpe
safadi
ofcs
target-snps
browsed
betatrophin
pb-arv567es
showcase
dropping
medium-recurrence
achn-bo5
pressure-time
integrals
in-shoe
primary-resected
sdm
viridis
side-based
field-based
interferon-alpha2
3loxp
gusb-null
ep4-knockdown
kruscal-wallis-h-test
mismatch-specific
adenine-
thymine-dna
mismatched
hdac1-mediated
non-clinical
89zr-dfo-amg102
-styrylphenoxy
butanoyl
-l-4-hydroxyprolyl
-thiazolidine
benzyloxycarbonyl-thioprolyl-thioprolinal
qatar
ta-rcc
trial-level
-79tt
poor-prognostic
gw1100
vadt
cbna
hmda-7
vwa2
masculinization
sw13-
dna-lna
turban
acylaminoacyl-peptidase
dystroglycan
dag1
k27m-mutant
gata2-
nfsa
pichia
pastoris
n-status
t-status
lc3-positive
invadopodia-related
il20rb
r213x
kayser-fleischer
shr-socs1-transduced
metallo-proteinases
cd-10
nkg2d-l
cbrs
sbrs
baroreceptor
ud-scc2
upci-scc72
002059
0000190
circ-sfmbt2
104075
100338
0136666
0000523
0006427
0089105
circagfg1
circepsti1
circtfrc
circ-d

assuring
chn2
ist
duct-to-duct
forget
rheb1
wccn
1177
mbic
deprives
co-twin
attention-control
non-cardiovascular
gyncsm
risk-adjusted
fbxo31-induced
electrophoresis-time
flight-mass
chordata-specific
ape1-knockout
oligos
uca1-overexpressing
piloted
fender
peripheries
aortogram
sensory-specific
a-c1
hras-like
egfrt790
klf17-mediated
klotho-treated
mo3
epigastralgia
t-antigens
bio-active
splice-variant
rdeb-associated
prothoracic
prp8
cyp307a2
spookier
spok
ecdysone-biosynthetic
hpv-immortalized
18f-fdg-pet
d299g
t399i
darwinian-type
wsc
pymt-
wnt1-driven
qg
rs34330
morc1
morc4
g-lisa-based
immunostaining-based
lec-mediated
tcm-treated
-cacgtg-3
advisory
adjudged
nonappearance
fedex
rs-hepcko
rvecs
nsr
acth-immunopositive
monocryl
4-0
years'
nmd-induced
ad2
asc-stimulated
carcinoma-asc
mek-targeting
ohsu
predesignated
mrx34
liposome-formulated
lmp1-stimulated
cribriform-morular
morular
ceratioid
leontiasis
ossea
jmjd2a-regulated
nonproductively
cyld-defective
npm1-luciferase
ei950
histoc

geosmin-producing
geosmin
off-flavors
pcdc4
icr4
pttg1-overexpressing
histocompatibility-mismatched
vcta
deiodination
decarboxylation
mb-pdt
bone-liver-thymus
hu-blt
hu-at
77-gene
regulable
alkomas
18-gauge
calix
pdpe
thermogelling
situ-forming
mushrooms
ergosterol
6-oap
ser611
ser613
arg609
interleukin-6-induced
super-resolution
incontinentia
pigmenti
rs8154
meqtl
genotype-tissue
lymphedemas
adnexes
45-63
refuge
virchow's
radio-resistant
mir-omic
microhemorrhage
macrohemorrhage
linear-by-linear
atm-null
pre-ri
lst1
leucocyte-specific
bbe8
stp1
lsu2
dc3000
recirculate
233-gene
based-comparative
tmucih
nrf2-ho-1
ip5
heuristic
hbi
tomosynthesis
filtered-back
fpb
needleless
dripping
lidocaine
mesangiopathy
acid-inducing
re-analyze
st18-depleted
-omics
akt-phosphorylation
kettering-integrated
cen7
effector-loop
procainamide
maraviroc
mir-218-tpd52
ogx-011
findings--endometrial
chi2
00005
tweets
sentiment
users'
tobreast
xanthoma
verrucopapillary
sclerosants
thermoplasty
tumor-to-nontumor
t

In [22]:
embs = [ i[1] for i in res]

In [23]:
pad = np.zeros(100).tolist()
unk = np.random.rand(100).tolist()
embs.append(pad)
embs.append(unk)


In [26]:
embs = np.array(embs)

In [27]:
np.save('embeddings.npy', embs)

In [1]:
import torch
import argparse
import numpy as np
import torch.nn as nn
from config import Config, Logger
from torch.nn import functional as F
from utils.tokenizer import Tokenizer, load_vocab

In [2]:
from data.dataset import ReBagDataLoader

In [3]:
logger = Logger()

In [4]:
vocab = load_vocab('dataset/vocab.txt')
tokenizer = Tokenizer(vocab)

In [5]:
data_loader = ReBagDataLoader('dataset/re/test.txt', 160, 0,logger, tokenizer)

2021-05-16 11:58:33,935 [DEBUG]: -----------read data-----------
2021-05-16 11:58:34,503 [DEBUG]: dataset/re/test.txt has data 78956
  cpuset_checked))


In [6]:
for i in data_loader:
    print(i)

['in', 'this', 'single', '-', 'center', 'retrospective', 'study', ',', '83', 'consecutive', 'patients', 'with', 'o', '##sse', '##ous', 'meta', '##sta', '##si', '##zed', 'pro', '##state', 'cancer', 'were', 'evaluated', ',', 'who', 'had', 'primarily', 'been', 'treated', 'by', 'sub', '##cap', '##sul', '##ar', 'bilateral', 'or', '##chi', '##ec', '##tom', '##y', '.']
['background', ':', 'ma', '##li', '##gnant', 'p', '##le', '##ural', 'me', '##so', '##the', '##lio', '##ma', '(', 'm', '##pm', ')', 'is', 'an', 'aggressive', 'neo', '##p', '##las', '##m', 'arising', 'from', 'me', '##so', '##the', '##lial', 'lining', 'of', 'p', '##le', '##ura', '.']['in', 'the', 'present', 'study', ',', 'the', 'anti', '-', 'tumor', 'action', 'of', 'me', '##sen', '##chy', '##mal', 'stem', 'cells', '(', 'm', '##s', '##cs', ')', 'has', 'been', 'examined', 'in', 'a', 'mouse', 'model', 'of', 'breast', 'cancer', 'with', 'emphasize', 'on', 'tumor', 'growth', ',', 'an', '##gio', '##genesis', 'and', 'c', '-', 'my', '##c',

tokens ['this', 'study', 'highlights', 'the', 'significant', 'difference', 'in', 'l', '##1', 'l', '##1', '-', 'or', '##f', '##2', 'expression', 'between', 'c', '##t', '##cs', 'and', 'normal', 'samples', '.'], tokens_id [101, 1142, 2025, 12976, 1103, 2418, 3719, 1107, 181, 1475, 181, 1475, 118, 1137, 2087, 1477, 2838, 1206, 172, 1204, 6063, 1105, 2999, 8025, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

tokens ['e', '##uka', '##ryo', '##tic', 'el', '##ong', '##ation', 'factor', '2', 'kinase', '(', 'e', '##ef', '##2', '##k', ')', ',', 'an', 'emerging', 'molecular', 'target', 'for', 'cancer', 'therapy', ',', 'contributes', 'to', 'cancer', 'proliferation', ',', 'cell', 'survival', ',', 'tumor', '##ige', '##nes', '##is', ',', 'and', 'invasion', ',', 'disease', 'progression', 'and', 'drug', 'resistance', '.'], tokens_id [101, 174, 12658, 26503, 2941, 8468, 4553, 1891, 5318, 123, 24779, 113, 174, 11470, 1477, 1377, 114, 117, 1126, 8999, 9546, 4010, 1111, 4182, 7606, 117, 17705, 1106, 4182, 23766, 117, 2765, 8115, 117, 14601, 13417, 3965, 1548, 117, 1105, 4923, 117, 3653, 16147, 1105, 3850, 4789, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 101, 102, 103, 104, 105, 106, 107, 108, 

tokens ['p', '##le', '##ural', 'ma', '##li', '##gnant', 'me', '##so', '##the', '##lio', '##ma', '(', 'm', '##pm', ')', 'is', 'a', 'de', '##tri', '##mental', 'neo', '##p', '##las', '##m', 'affecting', 'p', '##le', '##ural', 'sheets', 'and', 'determining', 'a', 'high', 'rate', 'of', 'mortality', '.'], tokens_id [101, 185, 1513, 12602, 12477, 2646, 15454, 1143, 7301, 10681, 9436, 1918, 113, 182, 9952, 114, 1110, 170, 1260, 19091, 15595, 15242, 1643, 7580, 1306, 12759, 185, 1513, 12602, 8675, 1105, 13170, 170, 1344, 2603, 1104, 14471, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['here', '##in', 'we', 'character', '##ize', 'vitamin', 'd', 'receptor', '-', 'mediated', 'regulation', 'of', 'k', '##lot', '##ho', 'm', '##rna', 'expression', ',', 'including', 'the', 'identification', 'of', 'vitamin', 'd', 're', '##sp', '##ons', '##ive', 'elements', '(', 'v', '##dre', '##s', ')', 'in', 'the', 'vicinity', 'of', 'both', 'the', 'mouse', 'and', 'human', 'k', '##lot', '##ho', 'genes', '.'], tokens_id [101, 1303, 1394, 1195, 1959, 3708, 23971, 173, 10814, 118, 22060, 8585, 1104, 180, 7841, 5114, 182, 11782, 2838, 117, 1259, 1103, 9117, 1104, 23971, 173, 1231, 20080, 4199, 2109, 3050, 113, 191, 11114, 1116, 114, 1107, 1103, 9748, 1104, 1241, 1103, 10322, 1105, 1769, 180, 7841, 5114, 9077, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

tokens ['h', '##ydro', '##phobic', 'modification', 'of', 'gel', '##ati', '##n', 'with', 'al', '##ip', '##hat', '##ic', 'al', '##de', '##hy', '##des', 'enhanced', 'ad', '##hesion', 'strength', 'to', 'gas', '##tric', 'and', 'es', '##op', '##hage', '##al', 'sub', '##mu', '##cos', '##al', 'tissues', 'through', 'h', '##ydro', '##phobic', 'interaction', 'with', 'living', 'tissues', 'and', 'co', '##hesion', 'force', '.'], tokens_id [101, 177, 19694, 22050, 15156, 1104, 27426, 11745, 1179, 1114, 2393, 9717, 11220, 1596, 2393, 2007, 7889, 4704, 9927, 8050, 23746, 3220, 1106, 3245, 11048, 1105, 13936, 4184, 19911, 1348, 4841, 13601, 13538, 1348, 14749, 1194, 177, 19694, 22050, 8234, 1114, 1690, 14749, 1105, 1884, 23746, 2049, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 95, 96, 97, 98

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['objectives', ':', 'the', 'aim', 'of', 'the', 'study', 'was', 'to', 'assess', 'the', 'expression', 'of', 'fi', '##bro', '##blast', 'growth', 'factor', 'receptor', '1', '(', 'f', '##g', '##f', '##r', '##1', ')', 'and', 'c', '##yt', '##oker', '##ati', '##n', '20', '(', 'c', '##k', '##20', ')', 'in', 'cancer', 'bladder', '(', 'c', '##b', ')', 'and', 'to', 'evaluate', 'their', 'association', 'with', 'the', 'clinic', '##op', '##ath', '##ological', 'features', 'of', 'the', 'disease', '.'], tokens_id [101, 11350, 131, 1103, 6457, 1104, 1103, 2025, 1108, 1106, 15187, 1103, 2838, 1104, 20497, 12725, 27184, 3213, 5318, 10814, 122, 113, 175, 1403, 2087, 1197, 1475, 114, 1105, 172, 25669, 26218, 11745, 1179, 1406, 113, 172, 1377, 10973, 114, 1107, 4182, 28083, 113, 172, 1830, 114, 1105, 1106, 17459, 1147, 3852, 1114, 1103, 12257, 4184, 9779, 7542, 1956, 1104, 1103, 3653, 119, 102, 0, 0, 0, 0, 

['the', 'expression', 'of', 'trans', '##me', '##mb', '##rane', 'pro', '##te', '##ase', 'se', '##rine', '4', '(', 't', '##mp', '##rs', '##s', '##4', ')', 'm', '##rna', 'and', 'protein', 'in', 'car', '##cin', '##oma', 'tissues', 'and', 'corresponding', 'adjacent', 'tissues', 'and', 'non', '-', 'tumor', '##ous', 'es', '##op', '##hage', '##al', 'tissues', 'was', 'determined', 'using', 'p', '##c', '##r', '(', 'q', '##rt', '-', 'p', '##c', '##r', ')', '.']tokens ['given', 'that', 'l', '##ys', '##op', '##hos', '##pha', '##ti', '##dic', 'acid', '(', 'l', '##pa', ')', 'and', 'the', 'te', '##tro', '##do', '##to', '##xin', '-', 'resistant', 'sodium', 'channel', 'na', '##v', '##1', 'are', 'both', 'involved', 'in', 'bone', 'cancer', 'pain', ',', 'the', 'present', 'study', 'was', 'designed', 'to', 'investigate', 'whether', 'cross', '##tal', '##k', 'between', 'the', 'l', '##pa', 'receptor', 'l', '##pa', '##1', '(', 'also', 'known', 'as', 'ed', '##g', '##2', ')', 'and', 'na', '##v', '##1', 'in', 'the'

['the', 'p', '##os', '##iti', '##vity', 'rates', 'of', 'mac', '##c', '##1', 'protein', 'were', '68', '%', '(', '41', '/', '60', ')', 'in', 'es', '##op', '##hage', '##al', 'car', '##cin', '##oma', 'tissue', 'and', 'there', 'were', 'significant', 'differences', 'from', 'those', 'in', 'neighboring', 'tissue', '(', '25', '(', '15', '/', '60', ')', ',', 'p', '<', '0', ')', '.']tokens ['background', 'this', 'study', 'aimed', 'to', 'explore', 'the', 'correlation', 'between', 'f', '##g', '##f', '##r', '##1', 'and', 'clinical', 'features', ',', 'including', 'survival', 'analysis', 'and', 'the', 'promotion', 'of', 'an', '##gio', '##genesis', 'by', 'fi', '##bro', '##blast', 'growth', 'factor', 'receptor', '1', '(', 'f', '##g', '##f', '##r', '##1', ')', 'and', 'vascular', 'end', '##oth', '##eli', '##al', 'growth', 'factor', 'receptor', '2', '(', 've', '##g', '##f', '##r', '##2', ')', '.'], tokens_id [101, 3582, 1142, 2025, 5850, 1106, 8664, 1103, 18741, 1206, 175, 1403, 2087, 1197, 1475, 1105, 730

tokens ['here', ',', 'we', 'describe', 'the', 'construction', 'and', 'characterization', 'of', 'at', '##ten', '##uated', 'l', '##1', '-', 'expressing', 's', '##hi', '##gel', '##la', 'vaccine', 'candidate', ',', 'by', 'fusion', 'of', 'l', '##1', 'l', '##1', 'into', 'the', 'auto', '##tra', '##ns', '##port', '##er', 'of', 's', '##hi', '##gel', '##la', 'son', '##ne', '##i', ',', 'i', '##cs', '##a', ',', 'an', 'essential', 'v', '##ir', '##ule', '##nce', 'factor', 'responsible', 'for', 'act', '##in', '-', 'based', 'm', '##ot', '##ility', '.'], tokens_id [101, 1303, 117, 1195, 5594, 1103, 2058, 1105, 27419, 1104, 1120, 5208, 13567, 181, 1475, 118, 14819, 188, 3031, 8863, 1742, 20034, 3234, 117, 1118, 11970, 1104, 181, 1475, 181, 1475, 1154, 1103, 12365, 4487, 2316, 4342, 1200, 1104, 188, 3031, 8863, 1742, 1488, 1673, 1182, 117, 178, 6063, 1161, 117, 1126, 6818, 191, 3161, 8722, 3633, 5318, 2784, 1111, 2496, 1394, 118, 1359, 182, 3329, 13378, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['patients', 'and', 'methods', ':', 'we', 'examined', 'the', 'levels', 'of', 'mi', '##r', '-', '365', 'in', 'breast', 'cancer', 'tissue', ',', 'compared', 'to', 'the', 'paired', 'adjacent', 'non', '-', 'tumor', 'breast', 'tissue', 'from', 'the', 'patients', '.'], tokens_id [101, 4420, 1105, 4069, 131, 1195, 8600, 1103, 3001, 1104, 1940, 1197, 118, 21033, 1107, 7209, 4182, 7918, 117, 3402, 1106, 1103, 13185, 4903, 1664, 118, 14601, 7209, 7918, 1121, 1103, 4420, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 

tokens ['we', 'identified', 'c', '##d', '##10', ',', 'a', 'metal', '##lop', '##rote', '##ase', '(', 'ne', '##p', '##rily', '##sin', ',', 'neutral', 'end', '##ope', '##pt', '##idas', '##e', ')', 'and', 'a', 'gene', 'that', 'is', 'specifically', 'induced', 'in', 'c', '##rc', 'cells', 'by', 'l', '##1', 'in', 'an', 'l', '##1', '/', 'h', '##21', '##0', '##q', 'mutation', '-', 'specific', 'manner', '.'], tokens_id [101, 1195, 3626, 172, 1181, 10424, 117, 170, 2720, 13200, 21020, 6530, 113, 24928, 1643, 11486, 10606, 117, 8795, 1322, 15622, 6451, 23358, 1162, 114, 1105, 170, 5565, 1115, 1110, 4418, 10645, 1107, 172, 19878, 3652, 1118, 181, 1475, 1107, 1126, 181, 1475, 120, 177, 18202, 1568, 4426, 17895, 118, 2747, 4758, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 88, 89, 90, 91, 92, 93, 94, 95, 

tokens ['g', '##eno', '##mic', '##ally', 'am', '##plified', 'fi', '##bro', '##blast', 'growth', 'factor', 'receptor', '1', '(', 'f', '##g', '##f', '##r', '##1', ')', 'is', 'an', 'on', '##co', '##genic', 'driver', 'in', 'defined', 'lung', 'cancer', 'subgroup', '##s', 'and', 'predict', '##s', 'se', '##ns', '##ibility', 'against', 'f', '##g', '##f', '##r', '##1', 'inhibitor', '##s', 'in', 'this', 'patient', 'co', '##hor', '##t', '.'], tokens_id [101, 176, 26601, 7257, 2716, 1821, 18580, 20497, 12725, 27184, 3213, 5318, 10814, 122, 113, 175, 1403, 2087, 1197, 1475, 114, 1110, 1126, 1113, 2528, 19438, 3445, 1107, 3393, 13093, 4182, 23470, 1116, 1105, 17163, 1116, 14516, 2316, 7706, 1222, 175, 1403, 2087, 1197, 1475, 27558, 1116, 1107, 1142, 5351, 1884, 13252, 1204, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],

tokens ['methods', ':', 'real', '-', 'time', 'flu', '##ores', '##cence', 'quantitative', 'polymer', '##ase', 'chain', 'reaction', '(', 'p', '##c', '##r', ')', 'ass', '##ay', 'was', 'used', 'to', 'determine', 'the', 'expression', 'of', 'mi', '##rna', '-', '106', '##a', 'gene', 'in', 'es', '##op', '##hage', '##al', 'cancer', 'tissue', 'and', 'corresponding', 'normal', 'm', '##uc', '##osa', 'of', '81', 'cases', '.'], tokens_id [101, 4069, 131, 1842, 118, 1159, 23896, 12238, 14797, 25220, 21176, 6530, 4129, 3943, 113, 185, 1665, 1197, 114, 3919, 4164, 1108, 1215, 1106, 4959, 1103, 2838, 1104, 1940, 11782, 118, 9920, 1161, 5565, 1107, 13936, 4184, 19911, 1348, 4182, 7918, 1105, 7671, 2999, 182, 21977, 9275, 1104, 5615, 2740, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 94, 95, 96, 97, 98,

tokens ['the', 'induction', 'of', 's', '##mo', '##c', '-', '2', 'expression', 'in', 'l', '##1', 'l', '##1', '-', 'expressing', 'c', '##rc', 'cells', 'was', 'necessary', 'for', 'the', 'increase', 'in', 'cell', 'm', '##ot', '##ility', ',', 'proliferation', 'under', 'stress', 'and', 'liver', 'meta', '##sta', '##sis', 'conferred', 'by', 'l', '##1', '.'], tokens_id [101, 1103, 18293, 1104, 188, 3702, 1665, 118, 123, 2838, 1107, 181, 1475, 181, 1475, 118, 14819, 172, 19878, 3652, 1108, 3238, 1111, 1103, 2773, 1107, 2765, 182, 3329, 13378, 117, 23766, 1223, 6600, 1105, 11911, 27154, 8419, 4863, 16104, 1118, 181, 1475, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,

['clinical', '##ly', ',', 'the', 'presence', 'of', 'c', '##d', '##36', '+', 'meta', '##sta', '##sis', '-', 'in', '##iti', '##ating', 'cells', 'co', '##rrel', '##ates', 'with', 'a', 'poor', 'pro', '##gno', '##sis', 'for', 'numerous', 'types', 'of', 'car', '##cin', '##oma', '##s', ',', 'and', 'in', '##hibition', 'of', 'c', '##d', '##36', 'also', 'imp', '##air', '##s', 'meta', '##sta', '##sis', ',', 'at', 'least', 'in', 'human', 'me', '##lan', '##oma', '-', 'and', 'breast', 'cancer', '-', 'derived', 't', '##umour', '##s', '.']tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128['in', 'the', 'on', '##cology', 'field', ',', 'genetic', 'counsel', '##ling', 'and', 'gene', 'testing', 'are', 'focused', 'on', 'the', 'two', 'most', 'common', 'syndrome', '##s', 'in', 'f', '##ami', '##lial', 'cancer', ':', 'hereditary', 'breast', 'and', 'o', '##var', '##ian', 'cancer', 'syndrome', '(', 'h', '##bo', '##c', ')', 'and', 'hereditary', 'non', '-', 'p', '##ol

['together', ',', 'our', 'study', 'reveals', 'a', 'novel', 'mechanism', 'of', 'p', '##i', '##3', '##k', '-', 'a', '##kt', 'in', '##hibition', '-', 'mediated', 'feedback', 'regulation', 'and', 'may', 'identify', 'fox', '##o', 'as', 'a', 'novel', 'bio', '##mark', '##er', 'to', 's', '##tra', '##ti', '##fy', 'patients', 'with', 'r', '##cc', 'for', 'p', '##i', '##3', '##k', 'or', 'a', '##kt', 'inhibitor', 'treatment', ',', 'or', 'a', 'novel', 'therapeutic', 'target', 'to', 's', '##yne', '##rg', '##ize', 'with', 'p', '##i', '##3', '##k', '-', 'a', '##kt', 'in', '##hibition', 'in', 'r', '##cc', 'treatment', '.']tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['implementation', 'of', 'a', 'standardized', 'm', '##ri', 'protocol', 'for', 're', '##tino', '##blast', '##oma', 'in', 'clinical', 'practice', 'may', 'benefit', 'children', 'worldwide', ',', 'especially', 'those', 'with', 'hereditary', 're', '##tino', '##blast', '##oma', ',', 'sinc

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128['in', 'this', 'study', ',', 'we', 'demonstrated', 'the', 'effect', 'of', 'l', '##s', '##d', '##1', 'on', 'o', '##var', '##ian', 'cancer', 'cell', 'migration', 'and', 'the', 'regulatory', 'role', 'of', 'l', '##s', '##d', '##1', 'in', 'the', 'expression', 'of', 'em', '##t', 'markers', '.']tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['in', 'our', 'study', ',', 'we', 'recruited', '139', 'patients', 'with', 'meta', '##static', 'pro', '##state', 'cancer', '(', 'm', '##p', '##ca', ')', 'who', 'received', 'trans', '##ure', '##th', '##ral', 're', '##section', 'of', 'the', 'pro', '##state', '(', 't', '##ur', '##p', ')', 'consecutive', '##ly', 'to', 'examine', 'whether', 't', '##im', '##3', 'expression', 'level', 'is', 'associated', 'with', 'overall', 'survival', '(', 'o', '##s', ')', 'in', 'm', '##p', '##ca', 'patients', '.'], tokens_id [

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['in', '##hibition', 'of', 's', '##tat', '##5', '##a', 'by', 'na', '##a', '##10', '##p', 'contributes', 'to', 'decreased', 'breast', 'cancer', 'meta', '##sta', '##sis', '.'], tokens_id [101, 1107, 16485, 1104, 188, 19756, 1571, 1161, 1118, 9468, 1161, 10424, 1643, 17705, 1106, 10558, 7209, 4182, 27154, 8419, 4863, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

tokens ['patients', 'and', 'methods', ':', 'we', 'examined', 'the', 'levels', 'of', 'mi', '##r', '-', '365', 'in', 'breast', 'cancer', 'tissue', ',', 'compared', 'to', 'the', 'paired', 'adjacent', 'non', '-', 'tumor', 'breast', 'tissue', 'from', 'the', 'patients', '.'], tokens_id [101, 4420, 1105, 4069, 131, 1195, 8600, 1103, 3001, 1104, 1940, 1197, 118, 21033, 1107, 7209, 4182, 7918, 117, 3402, 1106, 1103, 13185, 4903, 1664, 118, 14601, 7209, 7918, 1121, 1103, 4420, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['using', 'a', 'mouse', 'model', 'of', 'lung', 'sq', '##ua', '##mous', 'cell', 'car', '##cin', '##oma', '(', 's', '##cc', ')', ',', 'we', 'performed', 'trans', '##cript', '##ome', 'se', '##quencing', '(', 'r', '##na', '-', 'se', '##q', ')', 'to', 'profile', 'br', '##on', '##chia', '##l', 'air', '##way', 'gene', 'expression', 'and', 'found', 'activation', 'of', 'the', 'p', '##i', '##3', '##k', 'and', 'my', '##c', 'signaling', 'networks', 'in', 'c', '##yt', '##ological', '##ly', 'normal', 'br', '##on', '##chia', '##l', 'air', '##way', 'e', '##pit', '##hel', '##ial', 'cells', 'of', 'mice', 'with', 'pre', '##ne', '##op', '##astic', 'lung', 's', '##cc', 'lesions', ',', 'which', 'was', 'reversed', 'by', 'treatment', 'with', 'the', 'p', '##i', '##3', '##k', 'inhibitor', 'x', '##l', '-', '147', 'and', 'p', '##io', '##gli', '##ta', '##zone', ',', 'respectively', '.'], tokens_id [101, 1606, 1

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128['these', 'genes', 'were', 'p', '##rka', '##r', '##2', '##a', ',', 'p', '##rka', '##r', '##2', '##b', ',', 'c', '##y', '##cs', ',', 'b', '##c', '##l', '##2', ',', 'bi', '##rc', '##3', ',', 'd', '##ff', '##b', ',', 'ca', '##sp', '##6', ',', 'c', '##d', '##k', '##6', ',', 'cc', '##ne', '##1', ',', 's', '##te', '##ap', '##3', ',', 'm', '##c', '##m', '##7', ',', 'or', '##c', '##2', ',', 'or', '##c', '##5', ',', 'an', '##ap', '##c', '##1', ',', 'and', 'an', '##ap', '##c', '##7', ',', 'c', '##d', '##c', '##7', ',', 'c', '##d', '##c', '##27', ',', 'and', 's', '##k', '##p', '##1', '.']['previous', 'studies', 'showed', 'that', 'fi', '##bro', '##blast', 'growth', 'factor', 'receptor', '1', '(', 'f', '##g', '##f', '##r', '##1', ')', 'is', 'an', 'attractive', 'target', 'in', 'gas', '##tric', 'cancer', 'therapy', '.']tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn

tokens ['ma', '##mm', '##ographic', 'density', 'is', 'an', 'important', 'breast', 'cancer', 'risk', 'factor', ',', 'although', 'it', 'is', 'not', 'clear', 'whether', 'the', 'association', 'differs', 'across', 'breast', 'cancer', 'tumor', 'sub', '##type', '##s', '.'], tokens_id [101, 12477, 6262, 9597, 3476, 1110, 1126, 1696, 7209, 4182, 3187, 5318, 117, 1780, 1122, 1110, 1136, 2330, 2480, 1103, 3852, 13242, 1506, 7209, 4182, 14601, 4841, 15177, 1116, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

tokens ['the', 'aim', 'of', 'this', 'study', 'was', 'to', 'assess', 'correlation', 'between', 'c', '##t', '##cs', 'and', 'expression', 'of', 'em', '##t', 'transcription', 'factors', 'twist', '##1', 'and', 's', '##lug', 'in', 'breast', 'tumor', 'tissue', '.'], tokens_id [101, 1103, 6457, 1104, 1142, 2025, 1108, 1106, 15187, 18741, 1206, 172, 1204, 6063, 1105, 2838, 1104, 9712, 1204, 15416, 5320, 11079, 1475, 1105, 188, 16693, 1107, 7209, 14601, 7918, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

tokens ['further', '##more', ',', 'we', 'show', 'that', 'p', '##rm', '##t', '##7', 'induce', '##s', 'the', 'expression', 'of', 'matrix', 'metal', '##lop', '##rote', '##inas', '##e', '9', '(', 'mm', '##p', '##9', ')', ',', 'a', 'well', '-', 'known', 'media', '##tor', 'of', 'breast', 'cancer', 'meta', '##sta', '##sis', '.'], tokens_id [101, 1748, 4982, 117, 1195, 1437, 1115, 185, 9019, 1204, 1559, 21497, 1116, 1103, 2838, 1104, 8952, 2720, 13200, 21020, 16924, 1162, 130, 113, 2608, 1643, 1580, 114, 117, 170, 1218, 118, 1227, 2394, 2772, 1104, 7209, 4182, 27154, 8419, 4863, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121

['the', 'influence', 'of', 'diet', '##ary', 'fat', 'on', 'breast', 't', '##umour', 'growth', '(', '1', ')', 'and', ',', 'more', 'recently', ',', 'on', 'treatment', 'outcomes', ',', '(', '2', ',', '3', ')', 'suggests', 'an', 'important', 'role', 'for', 'diet', '##ary', 'advice', 'in', 'the', 'future', 'health', 'of', 'breast', 'cancer', 'patients', '.']['unlike', 'secret', '##ory', 'op', '##n', '(', 'so', '##p', '##n', ')', ',', 'which', 'trigger', '##s', 'the', 'e', '##pit', '##hel', '##ial', '-', 'me', '##sen', '##chy', '##mal', 'transition', '(', 'em', '##t', ')', 'to', 'initiate', 'cancer', 'meta', '##sta', '##sis', ',', 'in', '##tra', '##cellular', '/', 'nuclear', 'op', '##n', '(', 'i', '##op', '##n', ')', 'induce', '##s', 'the', 'me', '##sen', '##chy', '##mal', '-', 'e', '##pit', '##hel', '##ial', 'transition', '(', 'met', ')', 'to', 'facilitate', 'the', 'formation', 'of', 'meta', '##sta', '##ses', '.']

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['expression', 'of', 'er', '(', 'es', '##r', '##1', ')', ',', 'p', '##r', '(', 'p', '##g', '##r', ')', ',', 'a', '##r', ',', 'her', '##2', '(', 'er', '##bb', '##2', ')', 'and', 'k', '##i', '-', '67', '(', 'm', '##ki', '##6', '##7', ')', ')', 'in', 'breast', 'cancer', 'tissue', 'tissue', 'from', '102', 'm', '##b', '##c', 'patients', 'were', 'determined', 'using', 'im', '##mu', '##no', '##his', '##to', '##chemical', 'analysis', '.'], tokens_id [101, 2838, 1104, 14044, 113, 13936, 1197, 1475, 114, 117, 185, 1197, 113, 185, 1403, 1197, 114, 117, 170, 1197, 117, 1123, 1477, 113, 14044, 13834, 1477, 114, 1105, 180, 1182, 118, 5486, 113, 182, 2293, 1545, 1559, 114, 114, 1107, 7209, 4182, 7918, 7918, 1121, 9081, 182, 1830, 1665, 4420, 1127, 3552, 1606, 13280, 13601, 2728, 27516, 2430, 16710, 36

tokens ['we', 'studied', '51', 'fresh', 'frozen', 'tissue', 'samples', 'from', 'patients', 'with', 'localized', 'pro', '##state', 'cancer', '(', 'p', '##ca', ')', 'treated', 'by', 'radical', 'pro', '##state', '##ct', '##omy', 'and', 'three', 'meta', '##static', 'pro', '##state', 'cancer', 'cell', 'lines', '(', 'l', '##nc', '##ap', ',', 'du', '##14', '##5', ',', 'p', '##c', '##3', ')', '.'], tokens_id [101, 1195, 2376, 4062, 4489, 7958, 7918, 8025, 1121, 4420, 1114, 25813, 5250, 19596, 4182, 113, 185, 2599, 114, 5165, 1118, 8276, 5250, 19596, 5822, 18574, 1105, 1210, 27154, 27372, 5250, 19596, 4182, 2765, 2442, 113, 181, 26405, 11478, 117, 3840, 17175, 1571, 117, 185, 1665, 1495, 114, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 

tokens ['the', 'contributions', 'of', 'e', '##g', '##f', 'family', 'l', '##igan', '##ds', 'and', 'their', 'receptors', 'to', 'breast', 'cancer', 'are', 'complex', ',', 'and', 'the', 'specific', 'mechanisms', 'through', 'which', 'different', 'l', '##igan', '##ds', 'regulate', 'breast', 'tumor', 'initiation', 'and', 'growth', 'are', 'not', 'well', '-', 'defined', '.'], tokens_id [101, 1103, 5353, 1104, 174, 1403, 2087, 1266, 181, 10888, 3680, 1105, 1147, 14392, 1106, 7209, 4182, 1132, 2703, 117, 1105, 1103, 2747, 10748, 1194, 1134, 1472, 181, 10888, 3680, 16146, 7209, 14601, 21252, 1105, 3213, 1132, 1136, 1218, 118, 3393, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,

tokens ['the', 'mi', '##r', '-', '155', 'family', 'is', 'not', 'only', 'involved', 'in', 'a', 'diversity', 'of', 'cancer', '##s', ',', 'but', 'also', 'as', 'a', 'regulator', 'of', 'the', 'immune', 'system', '.'], tokens_id [101, 1103, 1940, 1197, 118, 14691, 1266, 1110, 1136, 1178, 2017, 1107, 170, 9531, 1104, 4182, 1116, 117, 1133, 1145, 1112, 170, 27335, 1104, 1103, 11650, 1449, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

tokens ['in', 'human', 'liver', 'cancer', '##s', ',', 'onto', '##logy', 'analysis', 'of', 'gene', 'set', 'en', '##rich', '##ment', 'analysis', '(', 'g', '##sea', ')', '-', 'defined', 'w', '##nt', 'signature', 'genes', 'indicates', 'that', 'w', '##nt', 'signaling', 'is', 'significantly', 'induced', 'in', 'tumor', 'samples', 'compared', 'with', 'healthy', 'liver', '##s', '.'], tokens_id [101, 1107, 1769, 11911, 4182, 1116, 117, 2135, 6360, 3622, 1104, 5565, 1383, 4035, 10886, 1880, 3622, 113, 176, 19885, 114, 118, 3393, 192, 2227, 8250, 9077, 6653, 1115, 192, 2227, 16085, 1110, 5409, 10645, 1107, 14601, 8025, 3402, 1114, 8071, 11911, 1116, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 14

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['western', 'b', '##lot', 'analyses', 'of', 'human', 'liver', 'cancer', 'specimens', 'showed', 'that', 'ca', '##m', '##ki', '##i', 'was', 'h', '##yper', '##ph', '##os', '##ph', '##ory', '##lated', 'in', 'liver', 'tumors', 'compared', 'with', 'the', 'paired', 'per', '##it', '##um', '##or', 'tissues', ',', 'which', 'supports', 'a', 'role', 'of', 'ca', '##m', '##ki', '##i', 'in', 'promoting', 'human', 'liver', 'cancer', 'progression', 'and', 'the', 'potential', 'clinical', 'use', 'of', 'be', '##rb', '##amine', 'for', 'liver', 'cancer', 'the', '##rap', '##ies', '.'], tokens_id [101, 2466, 171, 7841, 18460, 1104, 1769, 11911, 4182, 9985, 2799, 1115, 11019, 1306, 2293, 1182, 1108, 177, 24312, 7880, 2155, 7880, 4649, 6951, 1107, 11911, 24309, 3402, 1114, 1103, 13185, 1679, 2875, 1818, 1766, 14749, 117, 1134, 6253, 170, 1648, 1104, 11019, 1306, 2293, 1182, 1107, 7495, 1769, 11911, 4182, 161

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128['conclusion', ':', 'distinct', 'genetic', 'profiles', 'and', 'der', '##eg', '##ulation', 'of', 'different', 'canonical', 'pathways', 'apply', 'to', 'l', '##ync', '##h', 'syndrome', 'and', 'f', '##cc', '##t', '##x', 'and', 'key', 'targets', 'here', '##in', 'may', 'be', 'relevant', 'to', 'pursue', 'for', 'refined', 'diagnostic', 'and', 'therapeutic', 'strategies', 'in', 'hereditary', 'color', '##ec', '##tal', 'cancer', '.']

tokens ['therefore', ',', 'the', 'present', 'study', 'concluded', 'that', 'me', '##lat', '##oni', '##n', 'activate', '##s', 'kiss', '##1', 'production', 'in', 'meta', '##static', 'breast', 'cancer', 'cells', ',', 'suggesting', 'that', 'me', '##lat', '##oni', '##n', 'activation', 'of', 'kiss', '##1', 'production', 'may', 'regulate', 'the', 'process', 'of', 'breast', 'cancer', 'meta', '##sta', '##sis', '.'], tokens_id [101, 3335, 117, 1103, 1675, 2025, 4803, 1115, 1143, 1

tokens ['micro', '##rna', '-', '145', 'media', '##tes', 'the', 'inhibitor', '##y', 'effect', 'of', 'ad', '##ip', '##ose', 'tissue', '-', 'derived', 's', '##trom', '##al', 'cells', 'on', 'pro', '##state', 'cancer', '.'], tokens_id [101, 17599, 11782, 118, 14151, 2394, 3052, 1103, 27558, 1183, 2629, 1104, 8050, 9717, 6787, 7918, 118, 4408, 188, 24655, 1348, 3652, 1113, 5250, 19596, 4182, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

tokens ['more', 'important', ',', 'r', '##n', '##p', '##c', '##1', 'was', 'frequently', 'silence', '##d', 'in', 'breast', 'cancer', 'tissue', 'compared', 'to', 'adjacent', 'normal', 'breast', 'tissue', '.'], tokens_id [101, 1167, 1696, 117, 187, 1179, 1643, 1665, 1475, 1108, 3933, 3747, 1181, 1107, 7209, 4182, 7918, 3402, 1106, 4903, 2999, 7209, 7918, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

tokens ['here', ',', 'we', 'report', 'a', 'strong', 'association', 'of', 'the', 'number', 'of', 'm', '##uta', '##ted', 'genes', 'with', 'im', '##mu', '##no', '##his', '##to', '##chemical', 'and', 'p', '##am', '##50', 'sub', '##type', '##s', 'and', 't', '##umour', 'grade', 'in', 'breast', 'cancer', '.'], tokens_id [101, 1303, 117, 1195, 2592, 170, 2012, 3852, 1104, 1103, 1295, 1104, 182, 15012, 1906, 9077, 1114, 13280, 13601, 2728, 27516, 2430, 16710, 1105, 185, 2312, 11049, 4841, 15177, 1116, 1105, 189, 27226, 3654, 1107, 7209, 4182, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 1

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['the', 'nuclear', 'expression', 'of', 'est', '##rogen', 'receptor', '(', 'er', ')', 'and', 'pro', '##ges', '##tero', '##ne', 'receptor', '(', 'p', '##r', ')', 'in', 'normal', 'human', 'breast', 'tissue', 'is', 'maintained', 'in', 'ma', '##li', '##gnant', 'tissue', 'as', 'well', '.'], tokens_id [101, 1103, 4272, 2838, 1104, 12890, 26767, 10814, 113, 14044, 114, 1105, 5250, 7562, 25710, 1673, 10814, 113, 185, 1197, 114, 1107, 2999, 1769, 7209, 7918, 1110, 4441, 1107, 12477, 2646, 15454, 7918, 1112, 1218, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

tokens ['western', 'b', '##lot', 'analyses', 'of', 'human', 'liver', 'cancer', 'specimens', 'showed', 'that', 'ca', '##m', '##ki', '##i', 'was', 'h', '##yper', '##ph', '##os', '##ph', '##ory', '##lated', 'in', 'liver', 'tumors', 'compared', 'with', 'the', 'paired', 'per', '##it', '##um', '##or', 'tissues', ',', 'which', 'supports', 'a', 'role', 'of', 'ca', '##m', '##ki', '##i', 'in', 'promoting', 'human', 'liver', 'cancer', 'progression', 'and', 'the', 'potential', 'clinical', 'use', 'of', 'be', '##rb', '##amine', 'for', 'liver', 'cancer', 'the', '##rap', '##ies', '.'], tokens_id [101, 2466, 171, 7841, 18460, 1104, 1769, 11911, 4182, 9985, 2799, 1115, 11019, 1306, 2293, 1182, 1108, 177, 24312, 7880, 2155, 7880, 4649, 6951, 1107, 11911, 24309, 3402, 1114, 1103, 13185, 1679, 2875, 1818, 1766, 14749, 117, 1134, 6253, 170, 1648, 1104, 11019, 1306, 2293, 1182, 1107, 7495, 1769, 11911, 4182, 16147, 1105, 1103, 3209, 7300, 1329, 1104, 1129, 26281, 19577, 1111, 11911, 4182, 1103, 14543, 1905, 

['loss', '/', 'gain', 'of', 'function', 'ass', '##ays', 'performed', 'in', 'population', '-', 'specific', 'pro', '##state', 'cancer', 'cell', 'lines', 'confirmed', 'mi', '##r', '-', '133', '##a', '/', 'm', '##c', '##l', '##1', ',', 'mi', '##r', '-', '51', '##3', '##c', '/', 's', '##tat', '##1', ',', 'mi', '##r', '-', '96', '/', 'fox', '##o', '##3', '##a', ',', 'mi', '##r', '-', '145', '/', 'it', '##p', '##r', '##2', ',', 'and', 'mi', '##r', '-', '34', '##a', '/', 'pp', '##p', '##2', '##r', '##2', '##a', 'as', 'critical', 'mi', '##rna', '-', 'm', '##rna', 'pairing', '##s', 'driving', 'on', '##co', '##genesis', '.']tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['the', 'expression', 'levels', 'of', 'the', 'mi', '##rna', '##s', 'and', 'f', '##hit', 'were', 'down', '##re', '##gu', '##lated', 'in', 'breast', 'cancer', 'tissue', '.'], tokens_id [101, 1103, 2838, 3001, 1104, 1103, 1940, 11782, 1116, 1105, 175, 17481, 1127, 1205, 1874, 

tokens ['using', 'advanced', 'statistical', 'methods', ',', 'we', 'found', 'that', 'expression', 'levels', 'of', 'several', 'of', 'nuclear', 'transport', 'genes', 'including', 'x', '##po', '##1', 'were', 'associated', 'with', 'poor', 'survival', 'and', 'predicted', 're', '##cu', '##rrence', 'of', 'ta', '##mo', '##xi', '##fen', '-', 'treated', 'breast', 'tumors', 'in', 'human', 'breast', 'cancer', 'gene', 'expression', 'data', 'sets', '.'], tokens_id [101, 1606, 3682, 11435, 4069, 117, 1195, 1276, 1115, 2838, 3001, 1104, 1317, 1104, 4272, 3936, 9077, 1259, 193, 5674, 1475, 1127, 2628, 1114, 2869, 8115, 1105, 10035, 1231, 10182, 21629, 1104, 27629, 3702, 8745, 13488, 118, 5165, 7209, 24309, 1107, 1769, 7209, 4182, 5565, 2838, 2233, 3741, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [

tokens ['the', 'aim', 'of', 'this', 'study', 'was', 'to', 'character', '##ize', 'the', 'mechanisms', 'by', 'which', 'mi', '##r', '-', '145', 'der', '##eg', '##ulation', 'contribute', 'to', 'pro', '##state', 'cancer', 'progression', '.'], tokens_id [101, 1103, 6457, 1104, 1142, 2025, 1108, 1106, 1959, 3708, 1103, 10748, 1118, 1134, 1940, 1197, 118, 14151, 4167, 12606, 6856, 8681, 1106, 5250, 19596, 4182, 16147, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

['we', 'examined', 'the', 'association', 'of', 'r', '##s', '##20', '##46', '##21', '##0', 'and', 'its', 'six', 'link', '##age', 'di', '##se', '##qui', '##li', '##bri', '##um', 's', '##n', '##ps', 'with', 'clinic', '##op', '##ath', '##ological', 'characteristics', ',', 'pro', '##gno', '##sis', ',', 'and', 'gene', 'expression', 'levels', 'of', 'es', '##r', '##1', 'and', 'the', 'c', '##6', '##orf', '##s', '(', 'c', '##6', '##orf', '##9', '##7', ':', 'cc', '##d', '##c', '##17', '##0', ',', 'c', '##6', '##orf', '##21', '##1', ',', 'c', '##6', '##orf', '##9', '##6', ':', 'r', '##m', '##nd', '##1', ')', 'in', '34', '##4', 'breast', 'cancer', 'tissue', 'samples', 'and', '253', 'corresponding', 'samples', 'of', 'adjacent', 'normal', 'tissue', '.']tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['methods', ':', 'the', 'liver', 'cancer', 'cell', 'line', 's', '##m', '##cc', '-', '77', '##21', 'cells', 'and', 'the', 'normal', 'liver', 'cell',

tokens ['we', 'present', 'a', 'novel', 'tissue', 'sampling', 'simulation', 'model', 'and', 'demonstrate', 'its', 'application', 'on', 'k', '##i', '##6', '##7', 'assessment', 'in', 'breast', 'cancer', 'tissue', 'taking', 'in', '##tra', '##tum', '##oral', 'he', '##tero', '##gene', '##ity', 'into', 'account', '.'], tokens_id [101, 1195, 1675, 170, 2281, 7918, 18200, 14314, 2235, 1105, 10541, 1157, 4048, 1113, 180, 1182, 1545, 1559, 8670, 1107, 7209, 4182, 7918, 1781, 1107, 4487, 8928, 17536, 1119, 25710, 27054, 1785, 1154, 3300, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,

['fifty', 'one', 'patients', 'were', 'stage', 'i', '-', 'ii', 'and', '80', 'were', 'stage', 'ii', '##i', '-', 'i', '##v', '.']tokens ['excessive', 'il', '-', '6', 'has', 'been', 'demonstrated', 'in', 'primary', 'breast', 'tumors', 'and', 'breast', 'cancer', 'patient', 'se', '##ra', 'and', 'is', 'associated', 'with', 'poor', 'clinical', 'outcomes', 'in', 'breast', 'cancer', '.'], tokens_id [101, 12177, 14596, 118, 127, 1144, 1151, 7160, 1107, 2425, 7209, 24309, 1105, 7209, 4182, 5351, 14516, 1611, 1105, 1110, 2628, 1114, 2869, 7300, 13950, 1107, 7209, 4182, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 

tokens ['further', ',', 'we', 'studied', 'the', 'expression', 'of', 'r', '##n', '##p', '##c', '##1', 'in', 'breast', 'breast', 'cancer', 'tissue', 'and', 'adjacent', 'normal', 'breast', 'tissue', 'by', 'quantitative', 'r', '##t', '-', 'p', '##c', '##r', '(', 'q', '##rt', '-', 'p', '##c', '##r', ')', 'and', 'western', 'b', '##lot', '.'], tokens_id [101, 1748, 117, 1195, 2376, 1103, 2838, 1104, 187, 1179, 1643, 1665, 1475, 1107, 7209, 7209, 4182, 7918, 1105, 4903, 2999, 7209, 7918, 1118, 25220, 187, 1204, 118, 185, 1665, 1197, 113, 186, 3740, 118, 185, 1665, 1197, 114, 1105, 2466, 171, 7841, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 1

['the', 'first', 'stage', 'examined', '72', 'cases', 'of', 'invasive', 'duct', '##al', 'car', '##cin', '##oma', 'and', 'a', '##xi', '##llar', '##y', 'l', '##ymph', 'node', 'tissue', ',', '50', 'cases', 'of', 'breast', 'fi', '##bro', '##ade', '##no', '##ma', 'tissue', ',', 'and', '40', 'cases', 'of', 'normal', 'breast', 'tissue', '.']


['ca', '##usal', 'role', 'of', 'm', '##c', '##py', '##v', 'for', 'this', 'rare', 'and', 'aggressive', 'skin', 'cancer', 'is', 'suggested', 'by', 'mon', '##oc', '##lon', '##al', 'integration', 'and', 't', '##run', '##cation', 'of', 'large', 't', '(', 'l', '##t', ')', 'viral', 'anti', '##gen', 'in', 'm', '##cc', 'cells', '.']

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128['the', 'positive', 'expression', 'rate', 'of', 'su', '##s', '##d', '##3', 'protein', 'was', '78', '%', 'in', 'breast', 'cancer', ',', 'higher', 'than', '2', '%', 'in', 'adjacent', 'normal', 'breast', 'tissue', 'breast', 'tissue', '(', '

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128['in', 'the', 'present', 'study', ',', 'a', '##b', '##ce', '##1', 'expression', 'was', 'assessed', 'in', 'breast', 'cancer', 'tissue', 'and', 'adjacent', 'normal', 'breast', 'tissue', 'using', 'im', '##mu', '##no', '##his', '##to', '##chemistry', '.']
['me', '##rk', '##el', 'cell', 'car', '##cin', '##oma', '(', 'm', '##cc', ')', 'is', 'a', 'rare', 'but', 'aggressive', 'ne', '##uro', '##end', '##oc', '##rine', 'car', '##cin', '##oma', 'of', 'the', 'skin', '.']

['the', 'absence', 'of', 'g', '##ilt', 'expression', 'increased', 'significantly', 'from', '2', '%', '(', '2', '/', '99', ')', 'in', 'non', '##can', '##cer', '##ous', 'breast', 'tissues', 'to', '15', '%', '(', '34', '/', '218', ')', 'in', 'breast', 'cancer', 'tissues', '(', 'p', '<', '0', ')', '.']


tokens ['the', 'absence', 'of', 'g', 

tokens ['these', 'findings', 'highlight', 'the', 'context', '-', 'dependent', 'effects', 'of', 'mi', '##r', '-', '200', 'in', 'breast', 'breast', 'cancer', 'meta', '##sta', '##sis', 'and', 'demonstrate', 'the', 'existence', 'of', 'a', 'm', '##oes', '##in', '-', 'dependent', 'pathway', ',', 'distinct', 'from', 'the', 'z', '##eb', '##1', '-', 'e', '-', 'ca', '##dh', '##eri', '##n', 'axis', ',', 'through', 'which', 'mi', '##r', '-', '200', 'can', 'regulate', 't', '##umour', 'cell', 'plastic', '##ity', 'and', 'meta', '##sta', '##sis', '.'], tokens_id [101, 1292, 9505, 13426, 1103, 5618, 118, 7449, 3154, 1104, 1940, 1197, 118, 2363, 1107, 7209, 7209, 4182, 27154, 8419, 4863, 1105, 10541, 1103, 3796, 1104, 170, 182, 19270, 1394, 118, 7449, 13548, 117, 4966, 1121, 1103, 195, 15581, 1475, 118, 174, 118, 11019, 17868, 9866, 1179, 9840, 117, 1194, 1134, 1940, 1197, 118, 2363, 1169, 16146, 189, 27226, 2765, 5828, 1785, 1105, 27154, 8419, 4863, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

tokens ['its', 'expression', 'was', 'higher', 'in', 'breast', 'tumor', 'tissue', 'than', 'normal', 'tissue', '(', 'p', '=', '1', '##x', '##10', '-', '4', ')', ',', 'and', 'its', 'expression', 'was', 'significantly', 'higher', 'in', 'her', '##2', 'positive', 'than', 'her', '##2', 'negative', 'breast', 'tumors', 'in', 'all', 'four', 'co', '##hor', '##ts', 'analyzed', '.'], tokens_id [101, 1157, 2838, 1108, 2299, 1107, 7209, 14601, 7918, 1190, 2999, 7918, 113, 185, 134, 122, 1775, 10424, 118, 125, 114, 117, 1105, 1157, 2838, 1108, 5409, 2299, 1107, 1123, 1477, 3112, 1190, 1123, 1477, 4366, 7209, 24309, 1107, 1155, 1300, 1884, 13252, 2145, 17689, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139

['diabetes', 'me', '##lli', '##tus', 'is', 'one', 'of', 'the', 'biggest', 'public', 'health', 'concerns', 'worldwide', ',', 'which', 'includes', 'type', '1', 'diabetes', 'me', '##lli', '##tus', ',', 'type', '2', 'diabetes', 'me', '##lli', '##tus', ',', 'g', '##esta', '##tional', 'diabetes', 'me', '##lli', '##tus', ',', 'and', 'other', 'rare', 'forms', 'of', 'diabetes', 'me', '##lli', '##tus', '.']tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128['overall', ',', 'our', 'study', 'establishes', 'the', 'c', '##d', '##k', '##4', '/', '6', '-', 'dub', '##3', 'axis', 'as', 'an', 'important', 'regulatory', 'mechanism', 'of', 'b', 're', '##ast', 'cancer', 'meta', '##sta', '##sis', 'and', 'provides', 'a', 'rational', '##e', 'for', 'potential', 'therapeutic', 'interventions', 'in', 'the', 'treatment', 'of', 'b', 're', '##ast', 'cancer', 'meta', '##sta', '##sis', '.']tokens ['background', '&', 'aims', ':', 'liver', 'fi', '##bro', '##genesis', '-', '

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['diabetes', 'me', '##lli', '##tus', 'is', 'one', 'of', 'the', 'biggest', 'public', 'health', 'concerns', 'worldwide', ',', 'which', 'includes', 'type', '1', 'diabetes', 'me', '##lli', '##tus', ',', 'type', '2', 'diabetes', 'me', '##lli', '##tus', ',', 'g', '##esta', '##tional', 'diabetes', 'me', '##lli', '##tus', ',', 'and', 'other', 'rare', 'forms', 'of', 'diabetes', 'me', '##lli', '##tus', '.'], tokens_id [101, 17972, 1143, 6473, 4814, 1110, 1141, 1104, 1103, 4583, 1470, 2332, 5365, 4529, 117, 1134, 2075, 2076, 122, 17972, 1143, 6473, 4814, 117, 2076, 123, 17972, 1143, 6473, 4814, 117, 176, 16144, 15937, 17972, 1143, 6473, 4814, 117, 1105, 1168, 4054, 2769, 1104, 17972, 1143, 6473, 4814, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

['the', 'studies', 'which', 'were', 'published', 'in', 'en', '##gli', '##sh', 'investigated', 'the', 'risk', 'of', 'development', 'of', 'd', '##m', 'in', 'women', 'with', 'previous', 'history', 'of', 'g', '##d', '##m', ',', 'reported', 'outcome', 'according', 'to', 'ethnicity', 'with', 'specific', 'criteria', 'of', 'reporting', 'd', '##m', 'and', 'g', '##d', '##m', ',', 'reported', 'development', 'of', 'diabetes', 'after', '6', 'month', 'of', 'delivery', 'in', 'women', 'with', 'g', '##d', '##m', 'during', 'pregnancy', 'were', 'included', '.']['f', '##b', '##ln', '-', '4', 'and', 'b', '##c', '##rp', 'genes', 'as', 'two', 'pro', '##gno', '##stic', 'markers', 'are', 'down', '##re', '##gu', '##lated', 'in', 'breast', 'cancer', 'tissue', 'tissue', '.']['recently', ',', 'micro', '##rna', '##s', '(', 'mi', '##rna', '##s', ')', ',', 'small', 'non', '-', 'coding', 'r', '##nas', 'that', 'regulate', 'gene', 'expression', 'post', '-', 'transcription', '##ally', ',', 'have', 'been', 'shown', 'to', 

tokens ['recently', ',', 'micro', '##rna', '##s', '(', 'mi', '##rna', '##s', ')', ',', 'small', 'non', '-', 'coding', 'r', '##nas', 'that', 'regulate', 'gene', 'expression', 'post', '-', 'transcription', '##ally', ',', 'have', 'been', 'shown', 'to', 'be', 'involved', 'in', 'breast', 'cancer', 'meta', '##sta', '##sis', '.'], tokens_id [101, 3055, 117, 17599, 11782, 1116, 113, 1940, 11782, 1116, 114, 117, 1353, 1664, 118, 19350, 187, 13146, 1115, 16146, 5565, 2838, 2112, 118, 15416, 2716, 117, 1138, 1151, 2602, 1106, 1129, 2017, 1107, 7209, 4182, 27154, 8419, 4863, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 1

['here', ',', 'we', 'found', 'that', 'mi', '##r', '-', '50', '##3', '-', '3', '##p', 'was', 'over', '##ex', '##pressed', 'in', 'breast', 'cancer', 'tissue', 'and', 'plasma', 'compared', 'with', 'adjacent', 'normal', 'breast', 'tissue', 'and', 'with', 'plasma', 'from', 'healthy', 'individuals', '.']tokens ['recently', ',', 'specific', 'loss', 'of', 'e', '-', 'ca', '##dh', '##eri', '##n', 'in', 'liver', 'e', '##pit', '##hel', '##ial', 'cells', 'has', 'been', 'shown', 'to', 'favor', 'per', '##ip', '##ort', '##al', 'fi', '##bro', '##sis', ',', 'per', '##ip', '##ort', '##al', 'inflammation', 'and', 'liver', 'cancer', 'progression', ',', 'suggesting', 'that', 'e', '-', 'ca', '##dh', '##eri', '##n', 'is', 'a', 'central', 'liver', 'protector', '.'], tokens_id [101, 3055, 117, 2747, 2445, 1104, 174, 118, 11019, 17868, 9866, 1179, 1107, 11911, 174, 18965, 18809, 2916, 3652, 1144, 1151, 2602, 1106, 5010, 1679, 9717, 12148, 1348, 20497, 12725, 4863, 117, 1679, 9717, 12148, 1348, 24970, 1105, 11911

tokens ['me', '##rk', '##el', 'cell', 'p', '##oly', '##oma', '##virus', '(', 'm', '##c', '##v', ')', ',', 'a', 'previously', 'un', '##re', '##co', '##gni', '##zed', 'component', 'of', 'the', 'human', 'viral', 'skin', 'flora', ',', 'was', 'discovered', 'as', 'a', 'm', '##uta', '##ted', 'and', 'c', '##lon', '##ally', '-', 'integrated', 'virus', 'inserted', 'into', 'me', '##rk', '##el', 'cell', 'car', '##cin', '##oma', '(', 'm', '##cc', ')', 'genome', '##s', '.'], tokens_id [101, 1143, 4661, 1883, 2765, 185, 23415, 7903, 27608, 113, 182, 1665, 1964, 114, 117, 170, 2331, 8362, 1874, 2528, 22152, 5305, 6552, 1104, 1103, 1769, 14837, 2241, 16812, 117, 1108, 2751, 1112, 170, 182, 15012, 1906, 1105, 172, 4934, 2716, 118, 6576, 7942, 13137, 1154, 1143, 4661, 1883, 2765, 1610, 16430, 7903, 113, 182, 19515, 114, 15519, 1116, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

tokens ['the', 'role', 'of', 'c', '##rk', '##l', 'in', 'breast', 'cancer', 'meta', '##sta', '##sis', ':', 'insights', 'from', 'systems', 'biology', '.'], tokens_id [101, 1103, 1648, 1104, 172, 4661, 1233, 1107, 7209, 4182, 27154, 8419, 4863, 131, 24180, 1121, 2344, 10256, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

['t', '##ri', '##b', '##3', 'm', '##rna', 'expression', 'was', 'measured', 'in', 'breast', 'tumor', 'tissue', 'from', '247', 'patients', 'and', 'correlated', 'with', 'clinic', '##op', '##ath', '##ological', 'parameters', 'and', 'clinical', 'outcome', '.']tokens ['we', 'found', 'that', 'the', 'levels', 'of', 'mi', '##r', '-', '490', '-', '3', '##p', 'were', 'lower', 'in', 'the', 'breast', 'cancer', 'tissue', 'than', 'in', 'the', 'para', '##car', '##cin', '##oma', 'tissues', '.'], tokens_id [101, 1195, 1276, 1115, 1103, 3001, 1104, 1940, 1197, 118, 24680, 118, 124, 1643, 1127, 2211, 1107, 1103, 7209, 4182, 7918, 1190, 1107, 1103, 18311, 8766, 16430, 7903, 14749, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 111, 112, 113, 11

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['m', '##uri', '##ne', 'breast', 'cancer', 'tissue', '(', 'b', '##ct', ')', 'and', 'normal', 'm', '##uri', '##ne', 'breast', 'tissue', '(', 'b', '##t', ')', 'served', 'as', 'control', '.'], tokens_id [101, 182, 8212, 1673, 7209, 4182, 7918, 113, 171, 5822, 114, 1105, 2999, 182, 8212, 1673, 7209, 7918, 113, 171, 1204, 114, 1462, 1112, 1654, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['the', 'use', 'of', 'targeted', 'anti', '-', 'notch', 'therapy', 'in', 'the', 'clinic', 'to', 'treat', 'liver', 'cancer', 'will', 'require', 'considerable', 're', '##fine', '##ment', 'of', 'our', 'current', 'knowledge', 'on', 'the', 'regulation', 'of', 'notch', 'signaling', 'components', 'and', 'their', 'effects', 'in', 'both', 'normal', 'and', 'ma', '##li', '##gnant', 'liver', 'cells', 'in', 'order', 'to', 'target', 'specific', 'notch', 'subunit', '##s', 'which', 'are', 'critical', 'to', 'liver', 'cancer', 'tumor', '##ige', '##nes', '##is', 'but', 'not', 'to', 'the', 'home', '##ost', '##asis', 'of', 'normal', 'cells', '.'], tokens_id [101, 1103, 1329, 1104, 9271, 2848, 118, 23555, 7606, 1107, 1103, 12257, 1106, 7299, 11911, 4182, 1209, 4752, 5602, 1231, 24191, 1880, 1104, 1412, 1954, 

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['we', 'find', 'in', 'a', 'x', '##eno', '##gra', '##ft', 'or', '##th', '##oto', '##pic', 'model', 'of', 'breast', 'breast', 'cancer', 'meta', '##sta', '##sis', 'that', 'e', '##ct', '##op', '##ic', 'expression', 'of', 'members', 'of', 'the', 'mi', '##r', '-', '200', '##b', '/', '200', '##c', '/', '42', '##9', ',', 'but', 'not', 'the', 'mi', '##r', '-', '141', '/', '200', '##a', ',', 'functional', 'groups', 'limits', 't', '##umour', 'cell', 'invasion', 'and', 'meta', '##sta', '##sis', '.'], tokens_id [101, 1195, 1525, 1107, 170, 193, 26601, 14867, 4964, 1137, 1582, 12355, 20437, 2235, 1104, 7209, 7209, 4182, 27154, 8419, 4863, 1115, 174, 5822, 4184, 1596, 2838, 1104, 1484, 1104, 1103, 1940, 1197, 118, 2363, 1830, 120, 2363, 1665, 120, 3565, 1580, 117, 1133, 1136, 1103, 1940, 1197, 118, 16308, 120, 2363, 1161, 117, 8458, 2114, 6263, 189, 27226, 2765, 4923, 1105, 27154, 8419, 4863, 119,

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['here', 'we', 'employed', 'a', 'quantitative', 'p', '##c', '##r', 'method', 'to', 'detect', 'her', '##2', 'expression', 'in', 'one', 'hundred', 'ninety', 'nine', 'formal', '##in', '-', 'fixed', 'and', 'para', '##ffin', '-', 'embedded', '(', 'f', '##f', '##pe', ')', 'breast', 'cancer', 'tissue', 'samples', 'from', 'the', 'patients', 'treated', 'over', 'two', 'years', 'at', 'the', 'yo', '##nse', '##i', 'university', 'se', '##verance', 'hospital', ',', 'republic', 'of', 'k', '##ore', '##a', '.'], tokens_id [101, 1303, 1195, 4071, 170, 25220, 185, 1665, 1197, 3442, 1106, 11552, 1123, 1477, 2838, 1107, 1141, 2937, 16696, 2551, 4698, 1394, 118, 4275, 1105, 18311, 16274, 118, 11783, 113, 175, 2087, 3186, 114, 7209, 4182, 7918, 8025, 1121, 1103, 4420, 5165, 1166, 1160, 1201, 1120, 1103, 26063, 19054, 1182, 2755, 14516, 24374, 2704, 117, 13911, 1104, 180, 4474, 1161, 119, 102, 0, 0, 0, 0, 0

['the', 'proposed', 'method', 'can', 'distinguish', 'and', 'class', '##ify', 'liver', 'his', '##top', '##ath', '##ological', 'images', 'as', 'abnormal', 'or', 'normal', 'with', 'high', 'accuracy', ',', 'thus', 'providing', 'support', 'for', 'the', 'early', 'diagnosis', 'of', 'liver', 'cancer', '.']tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['finally', ',', 'for', 'in', 'v', '##ivo', 'studies', ',', 'magnetic', 'resonance', 'images', 'were', 'taken', 'in', 's', '##cid', 'mice', 'bearing', 'breast', 'cancer', 'tumor', 'pre', '-', 'and', 'post', '-', 'injection', ',', 'and', 'a', 'multi', '##mo', '##dal', 'na', '##nos', '##can', 'pet', '/', 'com', '##puted', 'to', '##mo', '##graphy', 'was', 'used', 'to', 'perform', 'pre', '##c', '##lini', '##cal', 'imaging', 'of', 'the', 'radio', '##la', '##bel', '##ed', 'na', '##no', '##par', '##tic', '##les', '.'], tokens_id [101, 1921, 117, 1111, 1107, 191, 15435, 2527, 117, 8364, 20370, 435

['t', '##rac', '##hom', '##ati', '##s', ')', 'infection', 'in', 'human', 'pro', '##state', 'e', '##pit', '##hel', '##ial', 'cells', 'using', 'an', 'in', 'v', '##it', '##ro', 'culture', 'system', 'in', 'which', 'human', 'and', '##rogen', '-', 'independent', 'p', '##c', '-', '3', 'pro', '##state', 'cancer', 'e', '##pit', '##hel', '##ial', 'cells', 'were', 'infected', 'with', 'c', '.']tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128


tokens ['results', ':', 'down', '##re', '##gu', '##lation', 'or', 'loss', 'of', 'e', '##pha', '##5', 'm', '##rna', 'or', 'protein', 'expression', 'was', 'detected', 'in', '28', 'of', '45', '(', '62', '%', ')', 'pro', '##state', 'car', '##cin', '##oma', '##s', ',', '2', 'of', '39', '(', '5', '%', ')', 'h', '##yper', '##p', '##lasia', '##s', ',', 'and', 'all', '6', 'pro', '##state', 'cancer', 'cell', 'lines', '.'], tokens_id [101, 2686, 131, 1205, 1874, 13830, 6840, 1137, 2445, 1104, 174, 20695, 1571, 182, 1178

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['conclusion', ':', 'the', 'positive', 'expression', 'rate', 'of', 's', '##tat', '##3', 'was', 'higher', 'in', 'breast', 'cancer', 'tissue', 'compared', 'to', 'normal', 'breast', 'tissue', ',', 'which', 'was', 'correlated', 'with', 'clinical', 'stage', ',', 'tumor', 'differentiation', ',', 'and', 'l', '##ymph', 'meta', '##sta', '##sis', '.'], tokens_id [101, 6593, 131, 1103, 3112, 2838, 2603, 1104, 188, 19756, 1495, 1108, 2299, 1107, 7209, 4182, 7918, 3402, 1106, 2999, 7209, 7918, 117, 1134, 1108, 27053, 1114, 7300, 2016, 117, 14601, 23510, 117, 1105, 181, 25698, 27154, 8419, 4863, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 113, 114, 115, 116, 11

['the', 'expression', 'of', 'sa', '##m', '##6', '##8', 'protein', 'in', 'breast', 'cancer', 'tissue', 'was', 'detected', 'by', 'im', '##mu', '##no', '##his', '##to', '##chemistry', '.']tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['objective', ':', 'the', 'aim', 'of', 'this', 'study', 'was', 'to', 'investigate', 'the', 'expression', 'of', 'signal', 'trans', '##du', '##cer', 'and', 'act', '##iva', '##tor', 'of', 'transcription', '3', '(', 's', '##tat', '##3', ')', 'in', 'breast', 'cancer', 'tissue', 'tissue', 'and', 'normal', 'breast', 'tissue', 'in', 'breast', 'cancer', 'patients', 'and', 'whether', 'it', 'was', 'correlated', 'with', 'clinical', 'parameters', '.'], tokens_id [101, 7649, 131, 1103, 6457, 1104, 1142, 2025, 1108, 1106, 8242, 1103, 2838, 1104, 4344, 14715, 7641, 14840, 1105, 2496, 12416, 2772, 1104, 15416, 124, 113, 188, 19756, 1495, 114, 1107, 7209, 4182, 7918, 7918, 1105, 2999, 7209, 7918, 1107, 7209, 4182, 4420

['conclusions', ':', 'in', 'this', 'translation', '##al', 'study', ',', 'we', 'showed', 'a', 'lack', 'of', 'association', 'between', 'c', '##t', '##cs', 'and', 'expression', 'of', 'em', '##t', '-', 'in', '##ducing', 'transcription', 'factors', ',', 'twist', '##1', 'and', 's', '##lug', ',', 'in', 'breast', 'breast', 'tumor', 'tissue', '.']
['in', 'our', 'mouse', 'model', ',', 'ma', '##mma', '##ry', 'fi', '##bro', '##blast', '##s', 'di', '##sse', '##minated', 'to', 'sites', 'of', 'breast', 'breast', 'cancer', 'meta', '##sta', '##ses', ',', 'providing', 'another', 'mechanism', 'to', 'increase', 'levels', 'of', 'c', '##x', '##c', '##l', '##12', 'in', 'meta', '##static', 'environments', '.']

['these', 'findings', 'do', 'not', 'provide', 'strong', 'evidence', 'that', 'ma', '##mm', '##ographic', 'density', 'parameters', 'differential', '##ly', 'affect', 'specific', 'breast', 'cancer', 'tumor', 'characteristics', '.']tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is

['more', '##over', ',', 'si', '##len', '##cing', 'c', '##d', '##5', '##4', 'greatly', 'reduced', 'the', 'tumor', '##ige', '##nes', '##is', 'of', 'pro', '##state', 'cancer', '##s', 'both', 'in', 'v', '##it', '##ro', 'and', 'in', 'v', '##ivo', 'and', 'significantly', 'extended', 'the', 'survival', 'time', 'of', 'tumor', '-', 'bearing', 'mice', 'in', 'a', 'pro', '##state', 'cancer', 'x', '##eno', '##gra', '##ft', 'model', '.']tokens ['the', 'functioning', 'of', 'a', 'connected', 'set', 'of', 'brain', 'structures', '-', 'pre', '##front', '##al', 'cortex', ',', 'hip', '##po', '##cam', '##pus', ',', 's', '##tri', '##at', '##um', ',', 'and', 'do', '##pa', '##mine', '##rg', '##ic', 'me', '##sen', '##ce', '##pha', '##lon', '-', 'is', 'reviewed', 'in', 'relation', 'to', 'two', 'important', 'distinction', '##s', ':', '(', 'a', ')', 'goal', '-', 'directed', 'as', 'opposed', 'to', 'habit', '##ual', 'behavior', 'and', '(', 'b', ')', 'model', '-', 'based', 'and', 'model', '-', 'free', 'learning', '.'

tokens ['instead', ',', 'we', 'found', 'that', 'trim', '##22', 'protein', 'level', 'co', '##rrel', '##ates', 'strongly', '(', 'r', '=', '0', ')', 'with', 'p', '##53', 'protein', 'level', 'in', 'normal', 'breast', 'tissue', ',', 'but', 'this', 'correlation', 'is', 'marked', '##ly', 'impaired', '(', 'r', '=', '0', ')', 'in', 'breast', 'cancer', 'tissue', 'tissue', ',', 'suggesting', 'that', 'there', 'is', 'some', 'defects', 'in', 'p', '##53', 'regulation', 'of', 'trim', '##22', 'gene', 'in', 'breast', 'cancer', '.'], tokens_id [101, 1939, 117, 1195, 1276, 1115, 13373, 20581, 4592, 1634, 1884, 18337, 5430, 5473, 113, 187, 134, 121, 114, 1114, 185, 24239, 4592, 1634, 1107, 2999, 7209, 7918, 117, 1133, 1142, 18741, 1110, 3597, 1193, 20606, 113, 187, 134, 121, 114, 1107, 7209, 4182, 7918, 7918, 117, 8783, 1115, 1175, 1110, 1199, 20705, 1107, 185, 24239, 8585, 1104, 13373, 20581, 5565, 1107, 7209, 4182, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

tokens ['the', 'results', 'clarified', 'that', 'd', '##5', 's', '##tat', '##5', '##a', '(', '1', ')', 'behave', '##s', 'as', 'a', 'promoting', 'factor', 'to', 'the', 'cell', 'proliferation', 'of', 'm', '##c', '##f', '-', '10', '##a', 'and', 'm', '##c', '##f', '-', '7', ',', '(', '2', ')', 'induce', '##s', 'enhance', '##r', 'of', 'z', '##este', 'ho', '##mology', '2', '(', 'e', '##z', '##h', '##2', ')', 'expression', 'in', 'breast', 'e', '##pit', '##hel', '##ial', 'cells', ',', 'as', 'well', 'as', 'his', '##tone', '3', 'trim', '##eth', '##yla', '##tion', 'of', 'i', '##g', '##f', '##b', '##p', '-', '7', 'promoter', 'region', ',', 'and', '(', '3', ')', 'lower', 'i', '##g', '##f', '##b', '##p', '-', '7', 'expression', 'was', 'detected', 'in', 'breast', 'cancer', 'tissue', '.'], tokens_id [101, 1103, 2686, 22484, 1115, 173, 1571, 188, 19756, 1571, 1161, 113, 122, 114, 18492, 1116, 1112, 170, 7495, 5318, 1106, 1103, 2765, 23766, 1104, 182, 1665, 2087, 118, 1275, 1161, 1105, 182, 1665, 2087, 1

tokens ['34', '##8', 'pro', '##state', 'car', '##cin', '##oma', '##s', 'from', 'consecutive', 'radical', 'pro', '##state', '##ct', '##omi', '##es', ',', '29', 'cast', '##ration', '-', 're', '##fra', '##ctor', '##y', 'pro', '##state', 'cancer', ',', '46', 'meta', '##sta', '##ses', ',', 'and', '45', 'ben', '##ign', 'h', '##yper', '##p', '##lasia', '##s', 'were', 'i', 'mm', '##uno', '##his', '##to', '##chemical', '##ly', 'analyzed', 'for', 'mage', '-', 'c', '##2', '/', 'c', '##t', '##10', 'expression', 'using', 't', '##iss', 'u', '##e', 'micro', '##arra', '##ys', '.'], tokens_id [101, 3236, 1604, 5250, 19596, 1610, 16430, 7903, 1116, 1121, 4776, 8276, 5250, 19596, 5822, 18882, 1279, 117, 1853, 2641, 6108, 118, 1231, 27476, 9363, 1183, 5250, 19596, 4182, 117, 3993, 27154, 8419, 8830, 117, 1105, 2532, 26181, 11368, 177, 24312, 1643, 22992, 1116, 1127, 178, 2608, 26761, 27516, 2430, 16710, 1193, 17689, 1111, 27595, 118, 172, 1477, 120, 172, 1204, 10424, 2838, 1606, 189, 14788, 190, 1162, 175

['to', 'identify', 'regulatory', 'drivers', 'of', 'pro', '##state', 'cancer', 'm', 'al', '##ign', '##ancy', ',', 'we', 'have', 'assembled', 'genome', '-', 'wide', 'regulatory', 'networks', '(', 'interact', '##ome', '##s', ')', 'for', 'human', 'and', 'mouse', 'p', 'r', '##ost', '##ate', 'cancer', 'from', 'expression', 'profiles', 'of', 'human', 'tumors', 'and', 'of', 'genetically', 'engineered', 'mouse', 'models', ',', 'respectively', '.']['the', 'aim', 'of', 'this', 'study', 'was', 'to', 'assess', 'correlation', 'between', 'c', '##t', '##cs', 'and', 'expression', 'of', 'em', '##t', 'transcription', 'factors', 'twist', '##1', 'and', 's', '##lug', 'in', 'breast', 'tumor', 'tissue', 'tissue', '.']


tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128['overall', ',', 'this', 'me', '##chan', '##istic', 'analysis', 'of', 'the', 'tumor', '-', 'suppress', '##ive', 'function', 'of', 'p', '##t', '##p', '##n', '##23', 'in', 'breast', 'cancer', 'suppo

tokens ['c', '##d', '##15', '/', 'f', '##ut', '##4', '-', 'high', 'expressing', 'co', '##lon', 'cancer', 'cells', 'with', 'primary', 'resistance', 'to', 'c', '##et', '##ux', '##ima', '##b', 'or', 'be', '##va', '##ci', '##zu', '##ma', '##b', 'are', 'significantly', 'more', 'sensitive', 'to', 'me', '##k', 'inhibitor', '##s', 'than', 'c', '##d', '##15', '/', 'f', '##ut', '##4', '-', 'low', 'counterparts', '.'], tokens_id [101, 172, 1181, 16337, 120, 175, 3818, 1527, 118, 1344, 14819, 1884, 4934, 4182, 3652, 1114, 2425, 4789, 1106, 172, 2105, 5025, 8628, 1830, 1137, 1129, 2497, 6617, 10337, 1918, 1830, 1132, 5409, 1167, 7246, 1106, 1143, 1377, 27558, 1116, 1190, 172, 1181, 16337, 120, 175, 3818, 1527, 118, 1822, 15289, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 128, 129, 130, 131, 132, 13

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128tokens ['although', 'p', '##ac', '##lit', '##ax', '##el', '(', 'tax', '##ol', ')', 'is', 'an', 'active', 'ch', '##em', '##oth', '##era', '##pe', '##uti', '##c', 'agent', 'for', 'the', 'treatment', 'of', 'breast', 'cancer', ',', 'not', 'all', 'breast', 'tumors', 'are', 'sensitive', 'to', 'this', 'drug', '.'], tokens_id [101, 1780, 185, 7409, 12888, 7897, 1883, 113, 3641, 4063, 114, 1110, 1126, 2327, 22572, 5521, 12858, 5970, 3186, 16065, 1665, 3677, 1111, 1103, 3252, 1104, 7209, 4182, 117, 1136, 1155, 7209, 24309, 1132, 7246, 1106, 1142, 3850, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

tokens ['results', ':', 'mi', '##r', '-', '32', 'was', 'frequently', 'over', '##ex', '##pressed', 'in', 'breast', 'breast', 'cancer', 'tissue', 'samples', 'and', 'cell', 'lines', 'as', 'was', 'demonstrated', 'by', 'q', '##rt', '-', 'p', '##c', '##r', '.'], tokens_id [101, 2686, 131, 1940, 1197, 118, 2724, 1108, 3933, 1166, 11708, 15716, 1107, 7209, 7209, 4182, 7918, 8025, 1105, 2765, 2442, 1112, 1108, 7160, 1118, 186, 3740, 118, 185, 1665, 1197, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

['in', 'the', 'present', 'study', ',', 'our', 'results', 'indicated', 'that', 'so', '##x', '##4', 'm', '##rna', 'and', 'protein', 'were', 'highly', 'expressed', 'in', 'breast', 'cancer', 'tissues', 'compared', 'with', 'adjacent', 'normal', 'ma', '##mma', '##ry', 'tissues', 'and', 'positively', 'correlated', 'with', 'clinical', 'stage', '(', 'i', '-', 'ii', 'vs', '.']['a', 'panel', 'of', 'established', 'human', 'cancer', 'cell', 'lines', ',', 'derived', 'from', 'g', '##lio', '##blast', '##oma', ',', 'co', '##lon', ',', 'and', 'breast', 'cancer', 'tissue', ',', 'was', 'used', 'to', 'evaluate', 'parameters', 'critical', 'for', 'effective', 'anti', '##can', '##cer', 'activity', '.']

['in', 'this', 'study', ',', 'the', 'skin', 'and', 'ultra', '##vio', '##let', 'neo', '##p', '##lasia', 'transplant', 'risk', 'assessment', 'ca', '##l', '##cula', '##tor', '(', 'sun', '##tra', '##c', ')', 'was', 'developed', 'to', 's', '##tra', '##ti', '##fy', 'patients', 'into', 'risk', 'groups', 'for', 'post'

tokens ['serum', 'am', '##yl', '##oid', 'a', 'expression', 'in', 'the', 'breast', 'cancer', 'tissue', 'tissue', 'is', 'associated', 'with', 'poor', 'pro', '##gno', '##sis', '.'], tokens_id [101, 23651, 1821, 7777, 7874, 170, 2838, 1107, 1103, 7209, 4182, 7918, 7918, 1110, 2628, 1114, 2869, 5250, 25566, 4863, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

tokens ['here', ',', 'we', 'used', 'a', 'p', '##ae', 'rat', 'model', 'to', 'analyze', 'messenger', 'r', '##na', '(', 'm', '##rna', ')', 'and', 'protein', 'expression', 'of', 'iron', 'home', '##ost', '##asis', 'genes', 'such', 'as', 'transfer', '##rin', 'receptor', '(', 't', '##f', '##r', ')', ',', 'di', '##valent', 'metal', 'transport', '##er', '(', 'd', '##m', '##t', '##1', ')', ',', 'f', '##er', '##rop', '##ort', '##in', '(', 'f', '##p', '##n', '##1', ')', ',', 'and', 'f', '##er', '##rit', '##in', '(', 'ft', ')', 'in', 'brain', 'areas', 'associated', 'with', 'memory', 'formation', 'such', 'as', 'the', 'pre', '##front', '##al', 'cortex', '(', 'p', '##f', '##c', ')', ',', 'ventral', 'te', '##gment', '##al', 'area', ',', 'and', 'hip', '##po', '##cam', '##pus', '.'], tokens_id [101, 1303, 117, 1195, 1215, 170, 185, 5024, 11631, 2235, 1106, 19774, 17957, 187, 1605, 113, 182, 11782, 114, 1105, 4592, 2838, 1104, 3926, 1313, 15540, 14229, 9077, 1216, 1112, 4036, 4854, 10814, 113, 189, 2087, 

tokens ['consistent', 'with', 'this', 'finding', ',', 'we', 'observe', 'that', 'm', 'el', '##ano', '##mas', 'with', 'an', 'ultra', '##vio', '##let', '-', 'induced', 'd', '##na', 'damage', 'mutation', 'signature', 'show', 'greatest', 'en', '##rich', '##ment', 'of', 'promoter', 'mutations', ',', 'whereas', 'c', 'an', '##cer', '##s', 'that', 'are', 'not', 'highly', 'dependent', 'on', 'ne', '##r', ',', 'such', 'as', 'co', '##lon', 'cancer', ',', 'show', 'no', 'sign', 'of', 'such', 'en', '##rich', '##ment', '.'], tokens_id [101, 8080, 1114, 1142, 4006, 117, 1195, 12326, 1115, 182, 8468, 7428, 7941, 1114, 1126, 18737, 17417, 5765, 118, 10645, 173, 1605, 3290, 17895, 8250, 1437, 4459, 4035, 10886, 1880, 1104, 17110, 17157, 117, 6142, 172, 1126, 14840, 1116, 1115, 1132, 1136, 3023, 7449, 1113, 24928, 1197, 117, 1216, 1112, 1884, 4934, 4182, 117, 1437, 1185, 2951, 1104, 1216, 4035, 10886, 1880, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

tokens ['we', 'tested', 'mi', '##r', '-', '490', '-', '3', '##p', 'expression', 'in', 'breast', 'cancer', 'tissue', 'and', 'para', '##car', '##cin', '##oma', 'tissue', 'using', 'reverse', 'transcription', '-', 'polymer', '##ase', 'chain', 'reaction', '.'], tokens_id [101, 1195, 7289, 1940, 1197, 118, 24680, 118, 124, 1643, 2838, 1107, 7209, 4182, 7918, 1105, 18311, 8766, 16430, 7903, 7918, 1606, 7936, 15416, 118, 21176, 6530, 4129, 3943, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

tokens ['m', '##uri', '##ne', 'breast', 'cancer', 'tissue', '(', 'b', '##ct', ')', 'and', 'normal', 'm', '##uri', '##ne', 'breast', 'tissue', '(', 'b', '##t', ')', 'served', 'as', 'control', '.'], tokens_id [101, 182, 8212, 1673, 7209, 4182, 7918, 113, 171, 5822, 114, 1105, 2999, 182, 8212, 1673, 7209, 7918, 113, 171, 1204, 114, 1462, 1112, 1654, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

['therefore', ',', 'we', 'h', '##y', '##pot', '##hes', '##ized', 'that', '1', ',', '25', '(', 'oh', ')', '2d', 'may', 'also', 'in', '##hibit', 'expression', 'of', 'c', '##y', '##p', '##27', '##a', '##1', ',', 'thereby', 'reducing', '27', '##h', '##c', 'concentrations', 'in', 'the', 'blood', 'and', 'tissues', 'that', 'express', 'c', '##y', '##p', '##27', '##a', '##1', ',', 'including', 'breast', 'cancer', 'tissue', '.']tokens ['e', '##pi', '##gene', '##tic', 'regulation', 'of', 'gene', 'expression', 'in', 'the', 'brain', 'plays', 'a', 'crucial', 'role', 'in', 'response', 'to', 'long', '-', 'lasting', 'stress', 'and', 'chronic', 'pain', ',', 'and', 'micro', '##rna', 'im', '##bal', '##ance', 'in', 'the', 'pre', '##front', '##al', 'cortex', '(', 'p', '##f', '##c', ')', 'might', 'be', 'involved', 'in', 'central', 'se', '##ns', '##iti', '##zation', '.'], tokens_id [101, 174, 8508, 27054, 2941, 8585, 1104, 5565, 2838, 1107, 1103, 3575, 2399, 170, 10268, 1648, 1107, 2593, 1106, 1263, 118, 9810

tokens ['the', 'aim', 'of', 'this', 'study', 'was', 'to', 'assess', 'correlation', 'between', 'c', '##t', '##cs', 'and', 'expression', 'of', 'em', '##t', 'transcription', 'factors', 'twist', '##1', 'and', 's', '##lug', 'in', 'breast', 'breast', 'tumor', 'tissue', '.'], tokens_id [101, 1103, 6457, 1104, 1142, 2025, 1108, 1106, 15187, 18741, 1206, 172, 1204, 6063, 1105, 2838, 1104, 9712, 1204, 15416, 5320, 11079, 1475, 1105, 188, 16693, 1107, 7209, 7209, 14601, 7918, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

tokens ['a', 'decision', 'tree', 'and', 'his', '##to', '##gram', 'analysis', 'were', 'applied', 'to', 'class', '##ify', 'breast', 'tissue', 'via', 'quantitative', 'analysis', 'for', 'detected', 'lesions', ',', 'which', 'were', 'used', 'to', 'distinguish', 'between', 'three', 'categories', 'of', 'breast', 'tissue', ':', 'ma', '##li', '##gnant', 'tumors', '(', 'i', '.', 'e', '.', ',', 'central', 'and', 'peripheral', 'zone', ')', ',', 'c', '##ys', '##ts', ',', 'and', 'normal', 'breast', 'tissues', '.'], tokens_id [101, 170, 2383, 2780, 1105, 1117, 2430, 12139, 3622, 1127, 3666, 1106, 1705, 6120, 7209, 7918, 2258, 25220, 3622, 1111, 11168, 26052, 117, 1134, 1127, 1215, 1106, 10706, 1206, 1210, 6788, 1104, 7209, 7918, 131, 12477, 2646, 15454, 24309, 113, 178, 119, 174, 119, 117, 2129, 1105, 17963, 4834, 114, 117, 172, 6834, 2145, 117, 1105, 2999, 7209, 14749, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

tokens ['however', ',', 'as', 'we', 'demonstrate', 'here', ',', 'there', 'are', 'two', 'additional', 'roles', 'for', 'c', '##tl', '##a', '##4', 'in', 'cancer', ',', 'including', 'via', 'c', '##tl', '##a', '##4', 'over', '##ex', '##press', '##ion', 'in', 'diverse', 'b', '-', 'cell', 'l', '##ymph', '##oma', '##s', 'and', 'in', 'me', '##lan', '##oma', '-', 'associated', 'b', 'cells', '.'], tokens_id [101, 1649, 117, 1112, 1195, 10541, 1303, 117, 1175, 1132, 1160, 2509, 3573, 1111, 172, 26414, 1161, 1527, 1107, 4182, 117, 1259, 2258, 172, 26414, 1161, 1527, 1166, 11708, 11135, 1988, 1107, 7188, 171, 118, 2765, 181, 25698, 7903, 1116, 1105, 1107, 1143, 4371, 7903, 118, 2628, 171, 3652, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,

['ex', '##oso', '##me', '-', 'mediated', 'transfer', 'of', 'mi', '##r', '-', '222', 'is', 'sufficient', 'to', 'increase', 'tumor', 'ma', '##li', '##gna', '##ncy', 'in', 'me', '##lan', '##oma', '.']tokens ['while', 'the', 'ma', '##li', '##gnant', 'e', '##pit', '##hel', '##ial', 'cells', 'of', 'pan', '##cre', '##atic', 'cancer', 'show', 'limited', 'expression', ',', 'in', 'breast', 'cancer', 'tissue', 'm', '##uc', '##1', '-', 'a', '##rf', 'demonstrates', 'strong', 'nuclear', 'expression', '.'], tokens_id [101, 1229, 1103, 12477, 2646, 15454, 174, 18965, 18809, 2916, 3652, 1104, 13316, 13782, 7698, 4182, 1437, 2609, 2838, 117, 1107, 7209, 4182, 7918, 182, 21977, 1475, 118, 170, 11931, 17798, 2012, 4272, 2838, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

tokens ['however', ',', 'ch', '##ole', '##ster', '##ol', 'concentrations', 'within', 'the', 'brain', 'pre', '##front', '##al', 'cortex', 'and', 'hip', '##po', '##cam', '##pus', 'did', 'not', 'differ', 'among', 'diet', '##ary', 'groups', '.'], tokens_id [101, 1649, 117, 22572, 9016, 4648, 4063, 14759, 1439, 1103, 3575, 3073, 11949, 1348, 21284, 1105, 5110, 5674, 24282, 10306, 1225, 1136, 11271, 1621, 10211, 3113, 2114, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

tokens ['using', 'a', 'newly', 'produced', 'anti', '-', 'e', '##por', 'anti', '##body', 'that', 're', '##lia', '##bly', 'detect', '##s', 'the', 'full', '-', 'length', 'is', '##of', '##orm', 'of', 'the', 'e', '##por', 'we', 'show', 'that', 'breast', 'cancer', 'tissue', 'tissue', 'and', 'cells', 'express', 'the', 'e', '##por', 'protein', '.'], tokens_id [101, 1606, 170, 3599, 1666, 2848, 118, 174, 18876, 2848, 14637, 1115, 1231, 4567, 4999, 11552, 1116, 1103, 1554, 118, 2251, 1110, 10008, 24211, 1104, 1103, 174, 18876, 1195, 1437, 1115, 7209, 4182, 7918, 7918, 1105, 3652, 6848, 1103, 174, 18876, 4592, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], pos1 is [0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 12

['m', 'el', '##ano', '##ma', 'is', 'one', 'of', 'the', 'dead', '##liest', 'c', 'an', '##cer', '##s', ',', 'yet', 'the', 'cells', 'of', 'origin', 'and', 'mechanisms', 'of', 'tumor', 'initiation', 'remain', 'unclear', '.']tokens ['this', 'study', 'aimed', 'at', 'assessing', 'the', 're', '##gene', '##rative', 'effect', 'of', 'p', '-', 't', '##yr', '##oso', '##l', 'in', 'trans', '##ient', 'global', 'cerebral', 'is', '##che', '##mia', 'modeled', 'in', 'adult', 'male', 'w', '##ista', '##r', 'rats', 'by', 're', '##versible', 'o', '##cc', '##lusion', 'of', 'the', 'three', 'major', 'vessels', 'originating', 'from', 'the', 'a', '##ort', '##ic', 'arch', 'and', 'supplying', 'the', 'blood', 'to', 'the', 'brain', '.', 'p', '-', 't', '##yr', '##oso', '##l', 'was', 'administered', 'in', '##tra', '##per', '##ito', '##nea', '##lly', 'in', 'a', 'dose', 'of', '20', 'mg', '/', 'kg', 'over', '10', 'days', 'after', 'surgery', '.'], tokens_id [101, 1142, 2025, 5850, 1120, 23529, 1103, 1231, 27054, 15306, 2629

tokens_id is 128, tokens_mask is 128, segment_id is 128, pos is 128, pos2 is 128, pcnn_mask is 128['interesting', '##ly', ',', 'el', '##a', '-', 'my', '##c', ':', 'par', '##p', '-', '1', '(', '-', '/', '-', ')', 'mice', 'displayed', 'fewer', 'duct', '##al', 't', '##umour', '##s', 'than', 'their', 'el', '##a', '-', 'my', '##c', ':', 'par', '##p', '-', '1', '(', '+', '/', '+', ')', 'counterparts', ',', 'suggesting', 'that', 'par', '##p', '-', '1', 'participates', 'in', 'promoting', 'a', '##cin', '##ar', '-', 'to', '-', 'duct', '##al', 'meta', '##p', '##lasia', ',', 'a', 'key', 'event', 'in', 'pan', '##cre', '##atic', 'cancer', 'initiation', '.']
['mi', '##r', '-', '124', 'expression', 'in', 'breast', 'cancer', 'tissue', 'was', 'measured', 'by', 'quantitative', 'real', '-', 'time', 'p', '##c', '##r', '(', 'q', '##rt', '-', 'p', '##c', '##r', ')', '.']tokens ['purpose', ':', 'disco', '##rda', '##nces', 'between', 'the', 'est', '##rogen', 'receptor', '(', 'er', ')', ',', 'pro', '##ges', '##

tokens ['some', 'of', 'the', 'largest', 'un', '##res', '##ol', '##ved', 'issues', 'are', 'pro', '##state', 'cancer', 'multi', '##fo', '##cal', '##ity', ',', 'limitations', 'of', 'current', 'bio', '##psy', 'strategies', ',', 'sub', '##op', '##ti', '##mal', 'staging', 'by', 'accepted', 'imaging', 'm', '##oda', '##lities', ',', 'less', 'than', 'robust', 'prediction', 'models', 'for', 'in', '##do', '##lent', 'pro', '##state', 'cancer', '##s', ',', 'and', 'safety', 'and', 'efficiency', 'of', 'the', 'established', 'cu', '##rative', 'the', '##rap', '##ies', 'following', 'focal', 'therapy', 'for', 'pro', '##state', 'cancer', '.'], tokens_id [101, 1199, 1104, 1103, 2026, 8362, 4894, 4063, 5790, 2492, 1132, 5250, 19596, 4182, 4321, 14467, 7867, 1785, 117, 13004, 1104, 1954, 25128, 12685, 10700, 117, 4841, 4184, 3121, 7435, 16772, 1118, 3134, 14377, 182, 16848, 16652, 117, 1750, 1190, 17351, 20770, 3584, 1111, 1107, 2572, 13147, 5250, 19596, 4182, 1116, 117, 1105, 3429, 1105, 8096, 1104, 1103, 16

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

