Skip to content

Commit

Permalink
Merge pull request #27 from jenchen1398/master
Browse files Browse the repository at this point in the history
Fix Date Bug in I2B2 2006 Preprocessing
  • Loading branch information
EmilyAlsentzer committed Aug 25, 2020
2 parents 169542d + 5916042 commit a9d9169
Showing 1 changed file with 22 additions and 1 deletion.
23 changes: 22 additions & 1 deletion downstream_tasks/i2b2_preprocessing/i2b2_2006_deid/to_conll.py
Expand Up @@ -17,7 +17,6 @@
for tag in phi_tags:
line = line.replace(tag[0], '__phi__').strip()


# Walk through sentence
phi_ind = 0
for w in line.split():
Expand All @@ -29,8 +28,30 @@
for t in toks[1:]:
print(t, 'I-%s'%tag)
phi_ind += 1
# Two elif statements check for edge cases with Dates
elif w.startswith('__phi__'):
# examples like following format:
# <PHI TYPE="DATE">01/01</PHI>/1995 or <PHI TYPE="DATE">01-01</PHI>-95
phi = phi_tags[phi_ind]
tag = phi[1]
toks = phi[2].split()
print(toks[0], 'B-%s'%tag)
if w[7:8] == '/' or w[7:8] == '-':
print(w[8:], 'O') # remove the / or - in the year
else:
print(w[7:], 'O')
phi_ind += 1
elif w.endswith('__phi__'):
# 1995<PHI TYPE="DATE">0101</PHI>
phi = phi_tags[phi_ind]
tag = phi[1]
toks = phi[2].split()
print(w[:-7], 'O')
print(toks[0], 'B-%s'%tag)
phi_ind += 1
else:
print(w, 'O')
print()
i+=1


0 comments on commit a9d9169

Please sign in to comment.