diff --git a/negation_tag_demo.py b/negation_tag_demo.py new file mode 100644 index 0000000..0d271e6 --- /dev/null +++ b/negation_tag_demo.py @@ -0,0 +1,65 @@ +import numpy as np +import pickle +import sys +import time +import pandas as pd +from multiprocessing import Pool +from itertools import product +import time +from sklearn.metrics import auc +from sklearn import metrics +import ast +from sklearn.feature_extraction.text import CountVectorizer +from negex import * + +def parse_sent(text,nlp, tok, neg_words): + + #tokens = word_tokenize(text) + tokens = tok(text) + + if any([ w in neg_words for w in tokens ]): + tags = ast.literal_eval(nlp.parse(text))['sentences'][0]['dependencies'] + + for tag in tags: + if tag[0] =='neg': + text = text.replace( tag[1], tag[1] + '_NEG' ) + + return text + else: + return text + +def split_sent(text): + + return re.split(r'[:?.]+',str(text)) + +def negate(text, irules , conditions): + + #cond = [c for c in conditions if c in text] + + sentences = list(split_sent(text)) + + tagged = [] + filter_conds = 0 + tag = 0 + + for s in sentences: + cond = [c for c in conditions if c in s] + + t = negTagger(sentence = s, phrases = cond, rules = irules, negP=False).getNegTaggedSentence() + + tagged.append(t) + + return ' '.join(tagged) + + +if __name__ =='__main__': + + rules= pd.read_csv('./negex_triggers.txt' ,sep='\t',header=None) + rules = list(rules[0] + '\t\t' + rules[2]) + irules = sortRules(rules) + + conditions = ['cough','headache'] + + sentence = 'the patient is negative for cough, and headache.' + + print(negate(sentence , irules , conditions)) diff --git a/negex.py b/negex.py new file mode 100755 index 0000000..0e51381 --- /dev/null +++ b/negex.py @@ -0,0 +1,220 @@ +import re + +def sortRules (ruleList): + """Return sorted list of rules. + + Rules should be in a tab-delimited format: 'rule\t\t[four letter negation tag]' + Sorts list of rules descending based on length of the rule, + splits each rule into components, converts pattern to regular expression, + and appends it to the end of the rule. """ + ruleList.sort(key = len, reverse = True) + sortedList = [] + for rule in ruleList: + s = rule.strip() + s= s.split('\t') + + splitTrig = s[0].split() + trig = r'\s+'.join(splitTrig) + pattern = r'\b(' + trig + r')\b' + s.append(re.compile(pattern, re.IGNORECASE)) + sortedList.append(s) + return sortedList + +class negTagger(object): + '''Take a sentence and tag negation terms and negated phrases. + + Keyword arguments: + sentence -- string to be tagged + phrases -- list of phrases to check for negation + rules -- list of negation trigger terms from the sortRules function + negP -- tag 'possible' terms as well (default = True) ''' + def __init__(self, sentence = '', phrases = None, rules = None, + negP = True): + self.__sentence = sentence + self.__phrases = phrases + self.__rules = rules + self.__negTaggedSentence = '' + self.__scopesToReturn = [] + self.__negationFlag = None + + filler = '_' + + for rule in self.__rules: + reformatRule = re.sub(r'\s+', filler, rule[0].strip()) + self.__sentence = rule[3].sub (' ' + rule[2].strip() + + reformatRule + + rule[2].strip() + ' ', self.__sentence) + for phrase in self.__phrases: + phrase = re.sub(r'([.^$*+?{\\|()[\]])', r'\\\1', phrase) + splitPhrase = phrase.split() + joiner = r'\W+' + joinedPattern = r'\b' + joiner.join(splitPhrase) + r'\b' + reP = re.compile(joinedPattern, re.IGNORECASE) + m = reP.search(self.__sentence) + if m: + self.__sentence = self.__sentence.replace(m.group(0), '[PHRASE]' + + re.sub(r'\s+', filler, m.group(0).strip()) + + '[PHRASE]') + +# Exchanges the [PHRASE] ... [PHRASE] tags for [NEGATED] ... [NEGATED] +# based on PREN, POST rules and if negPoss is set to True then based on +# PREP and POSP, as well. +# Because PRENEGATION [PREN} is checked first it takes precedent over +# POSTNEGATION [POST]. Similarly POSTNEGATION [POST] takes precedent over +# POSSIBLE PRENEGATION [PREP] and [PREP] takes precedent over POSSIBLE +# POSTNEGATION [POSP]. + + overlapFlag = 0 + prenFlag = 0 + postFlag = 0 + prePossibleFlag = 0 + postPossibleFlag = 0 + + sentenceTokens = self.__sentence.split() + sentencePortion = '' + aScopes = [] + sb = [] + #check for [PREN] + for i in range(len(sentenceTokens)): + if sentenceTokens[i][:6] == '[PREN]': + prenFlag = 1 + overlapFlag = 0 + + if sentenceTokens[i][:6] in ['[CONJ]', '[PSEU]', '[POST]', '[PREP]', '[POSP]']: + overlapFlag = 1 + + if i+1 < len(sentenceTokens): + if sentenceTokens[i+1][:6] == '[PREN]': + overlapFlag = 1 + if sentencePortion.strip(): + aScopes.append(sentencePortion.strip()) + sentencePortion = '' + + if prenFlag == 1 and overlapFlag == 0: + sentenceTokens[i] = sentenceTokens[i].replace('[PHRASE]', '[NEGATED]') + sentencePortion = sentencePortion + ' ' + sentenceTokens[i] + + sb.append(sentenceTokens[i]) + + if sentencePortion.strip(): + aScopes.append(sentencePortion.strip()) + + sentencePortion = '' + sb.reverse() + sentenceTokens = sb + sb2 = [] + # Check for [POST] + for i in range(len(sentenceTokens)): + if sentenceTokens[i][:6] == '[POST]': + postFlag = 1 + overlapFlag = 0 + + if sentenceTokens[i][:6] in ['[CONJ]', '[PSEU]', '[PREN]', '[PREP]', '[POSP]']: + overlapFlag = 1 + + if i+1 < len(sentenceTokens): + if sentenceTokens[i+1][:6] == '[POST]': + overlapFlag = 1 + if sentencePortion.strip(): + aScopes.append(sentencePortion.strip()) + sentencePortion = '' + + if postFlag == 1 and overlapFlag == 0: + sentenceTokens[i] = sentenceTokens[i].replace('[PHRASE]', '[NEGATED]') + sentencePortion = sentenceTokens[i] + ' ' + sentencePortion + + sb2.insert(0, sentenceTokens[i]) + + if sentencePortion.strip(): + aScopes.append(sentencePortion.strip()) + + sentencePortion = '' + self.__negTaggedSentence = ' '.join(sb2) + + if negP: + sentenceTokens = sb2 + sb3 = [] + # Check for [PREP] + for i in range(len(sentenceTokens)): + if sentenceTokens[i][:6] == '[PREP]': + prePossibleFlag = 1 + overlapFlag = 0 + + if sentenceTokens[i][:6] in ['[CONJ]', '[PSEU]', '[POST]', '[PREN]', '[POSP]']: + overlapFlag = 1 + + if i+1 < len(sentenceTokens): + if sentenceTokens[i+1][:6] == '[PREP]': + overlapFlag = 1 + if sentencePortion.strip(): + aScopes.append(sentencePortion.strip()) + sentencePortion = '' + + if prePossibleFlag == 1 and overlapFlag == 0: + sentenceTokens[i] = sentenceTokens[i].replace('[PHRASE]', '[POSSIBLE]') + sentencePortion = sentencePortion + ' ' + sentenceTokens[i] + + sb3 = sb3 + ' ' + sentenceTokens[i] + + if sentencePortion.strip(): + aScopes.append(sentencePortion.strip()) + + sentencePortion = '' + sb3.reverse() + sentenceTokens = sb3 + sb4 = [] + # Check for [POSP] + for i in range(len(sentenceTokens)): + if sentenceTokens[i][:6] == '[POSP]': + postPossibleFlag = 1 + overlapFlag = 0 + + if sentenceTokens[i][:6] in ['[CONJ]', '[PSEU]', '[PREN]', '[PREP]', '[POST]']: + overlapFlag = 1 + + if i+1 < len(sentenceTokens): + if sentenceTokens[i+1][:6] == '[POSP]': + overlapFlag = 1 + if sentencePortion.strip(): + aScopes.append(sentencePortion.strip()) + sentencePortion = '' + + if postPossibleFlag == 1 and overlapFlag == 0: + sentenceTokens[i] = sentenceTokens[i].replace('[PHRASE]', '[POSSIBLE]') + sentencePortion = sentenceTokens[i] + ' ' + sentencePortion + + sb4.insert(0, sentenceTokens[i]) + + if sentencePortion.strip(): + aScopes.append(sentencePortion.strip()) + + self.__negTaggedSentence = ' '.join(sb4) + + if '[NEGATED]' in self.__negTaggedSentence: + self.__negationFlag = 'negated' + elif '[POSSIBLE]' in self.__negTaggedSentence: + self.__negationFlag = 'possible' + else: + self.__negationFlag = 'affirmed' + + self.__negTaggedSentence = self.__negTaggedSentence.replace(filler, ' ') + + for line in aScopes: + tokensToReturn = [] + thisLineTokens = line.split() + for token in thisLineTokens: + if token[:6] not in ['[PREN]', '[PREP]', '[POST]', '[POSP]']: + tokensToReturn.append(token) + self.__scopesToReturn.append(' '.join(tokensToReturn)) + + def getNegTaggedSentence(self): + return self.__negTaggedSentence + def getNegationFlag(self): + return self.__negationFlag + def getScopes(self): + return self.__scopesToReturn + + def __str__(self): + text = self.__negTaggedSentence + text += '\t' + self.__negationFlag + text += '\t' + '\t'.join(self.__scopesToReturn) diff --git a/negex_triggers.txt b/negex_triggers.txt new file mode 100755 index 0000000..b9f3712 --- /dev/null +++ b/negex_triggers.txt @@ -0,0 +1,272 @@ +no increase [PSEU] +no suspicious change [PSEU] +no significant change [PSEU] +no change [PSEU] +no interval change [PSEU] +no definite change [PSEU] +no significant interval change [PSEU] +not extend [PSEU] +not cause [PSEU] +not drain [PSEU] +not certain if [PSEU] +not certain whether [PSEU] +gram negative [PSEU] +without difficulty [PSEU] +not necessarily [PSEU] +not only [PSEU] +absence of [PREN] +cannot [PREN] +cannot see [PREN] +checked for [PREN] +declined [PREN] +declines [PREN] +denied [PREN] +denies [PREN] +denying [PREN] +evaluate for [PREN] +fails to reveal [PREN] +free of [PREN] +negative for [PREN] +never developed [PREN] +never had [PREN] +no [PREN] +no abnormal [PREN] +no cause of [PREN] +no complaints of [PREN] +no evidence [PREN] +no new evidence [PREN] +no other evidence [PREN] +no evidence to suggest [PREN] +no findings of [PREN] +no findings to indicate [PREN] +no mammographic evidence of [PREN] +no new [PREN] +no radiographic evidence of [PREN] +no sign of [PREN] +no significant [PREN] +no signs of [PREN] +no suggestion of [PREN] +no suspicious [PREN] +not [PREN] +not appear [PREN] +not appreciate [PREN] +not associated with [PREN] +not complain of [PREN] +not demonstrate [PREN] +not exhibit [PREN] +not feel [PREN] +not had [PREN] +not have [PREN] +not know of [PREN] +not known to have [PREN] +not reveal [PREN] +not see [PREN] +not to be [PREN] +patient was not [PREN] +rather than [PREN] +resolved [PREN] +test for [PREN] +to exclude [PREN] +unremarkable for [PREN] +with no [PREN] +without [PREN] +without any evidence of [PREN] +without evidence [PREN] +without indication of [PREN] +without sign of [PREN] +rules out [PREN] +rules him out [PREN] +rules her out [PREN] +rules the patient out [PREN] +rules out for [PREN] +rules him out for [PREN] +rules her out for [PREN] +rules the patient out for [PREN] +ruled out [PREN] +ruled him out [PREN] +ruled her out [PREN] +ruled the patient out [PREN] +ruled out for [PREN] +ruled him out for [PREN] +ruled her out for [PREN] +ruled the patient out for [PREN] +ruled out against [PREN] +ruled him out against [PREN] +ruled her out against [PREN] +ruled the patient out against [PREN] +did rule out [PREN] +did rule out for [PREN] +did rule out against [PREN] +did rule him out [PREN] +did rule her out [PREN] +did rule the patient out [PREN] +did rule him out for [PREN] +did rule her out for [PREN] +did rule him out against [PREN] +did rule her out against [PREN] +did rule the patient out for [PREN] +did rule the patient out against [PREN] +can rule out [PREN] +can rule out for [PREN] +can rule out against [PREN] +can rule him out [PREN] +can rule her out [PREN] +can rule the patient out [PREN] +can rule him out for [PREN] +can rule her out for [PREN] +can rule the patinet out for [PREN] +can rule him out against [PREN] +can rule her out against [PREN] +can rule the patinet out against [PREN] +adequate to rule out [PREN] +adequate to rule him out [PREN] +adequate to rule her out [PREN] +adequate to rule the patient out [PREN] +adequate to rule out for [PREN] +adequate to rule him out for [PREN] +adequate to rule her out for [PREN] +adequate to rule the patient out for [PREN] +adequate to rule the patient out against [PREN] +sufficient to rule out [PREN] +sufficient to rule him out [PREN] +sufficient to rule her out [PREN] +sufficient to rule the patient out [PREN] +sufficient to rule out for [PREN] +sufficient to rule him out for [PREN] +sufficient to rule her out for [PREN] +sufficient to rule the patient out for [PREN] +sufficient to rule out against [PREN] +sufficient to rule him out against [PREN] +sufficient to rule her out against [PREN] +sufficient to rule the patient out against [PREN] +rule out [PREP] +r/o [PREP] +ro [PREP] +rule him out [PREP] +rule her out [PREP] +rule the patient out [PREP] +rule out for [PREP] +rule him out for [PREP] +rule her out for [PREP] +rule the patinet out for [PREP] +be ruled out for [PREP] +should be ruled out for [PREP] +ought to be ruled out for [PREP] +may be ruled out for [PREP] +might be ruled out for [PREP] +could be ruled out for [PREP] +will be ruled out for [PREP] +can be ruled out for [PREP] +must be ruled out for [PREP] +is to be ruled out for [PREP] +what must be ruled out is [PREP] +unlikely [POST] +free [POST] +was ruled out [POST] +is ruled out [POST] +are ruled out [POST] +have been ruled out [POST] +has been ruled out [POST] +did not rule out [POSP] +not ruled out [POSP] +not been ruled out [POSP] +being ruled out [POSP] +be ruled out [POSP] +should be ruled out [POSP] +ought to be ruled out [POSP] +may be ruled out [POSP] +might be ruled out [POSP] +could be ruled out [POSP] +will be ruled out [POSP] +can be ruled out [POSP] +must be ruled out [POSP] +is to be ruled out [POSP] +but [CONJ] +however [CONJ] +nevertheless [CONJ] +yet [CONJ] +though [CONJ] +although [CONJ] +still [CONJ] +aside from [CONJ] +except [CONJ] +apart from [CONJ] +secondary to [CONJ] +as the cause of [CONJ] +as the source of [CONJ] +as the reason of [CONJ] +as the etiology of [CONJ] +as the origin of [CONJ] +as the cause for [CONJ] +as the source for [CONJ] +as the reason for [CONJ] +as the etiology for [CONJ] +as the origin for [CONJ] +as the secondary cause of [CONJ] +as the secondary source of [CONJ] +as the secondary reason of [CONJ] +as the secondary etiology of [CONJ] +as the secondary origin of [CONJ] +as the secondary cause for [CONJ] +as the secondary source for [CONJ] +as the secondary reason for [CONJ] +as the secondary etiology for [CONJ] +as the secondary origin for [CONJ] +as a cause of [CONJ] +as a source of [CONJ] +as a reason of [CONJ] +as a etiology of [CONJ] +as a cause for [CONJ] +as a source for [CONJ] +as a reason for [CONJ] +as a etiology for [CONJ] +as a secondary cause of [CONJ] +as a secondary source of [CONJ] +as a secondary reason of [CONJ] +as a secondary etiology of [CONJ] +as a secondary origin of [CONJ] +as a secondary cause for [CONJ] +as a secondary source for [CONJ] +as a secondary reason for [CONJ] +as a secondary etiology for [CONJ] +as a secondary origin for [CONJ] +as an cause of [CONJ] +as an source of [CONJ] +as an reason of [CONJ] +as an etiology of [CONJ] +as an origin of [CONJ] +as an cause for [CONJ] +as an source for [CONJ] +as an reason for [CONJ] +as an etiology for [CONJ] +as an origin for [CONJ] +as an secondary cause of [CONJ] +as an secondary source of [CONJ] +as an secondary reason of [CONJ] +as an secondary etiology of [CONJ] +as an secondary origin of [CONJ] +as an secondary cause for [CONJ] +as an secondary source for [CONJ] +as an secondary reason for [CONJ] +as an secondary etiology for [CONJ] +as an secondary origin for [CONJ] +cause of [CONJ] +cause for [CONJ] +causes of [CONJ] +causes for [CONJ] +source of [CONJ] +source for [CONJ] +sources of [CONJ] +sources for [CONJ] +reason of [CONJ] +reason for [CONJ] +reasons of [CONJ] +reasons for [CONJ] +etiology of [CONJ] +etiology for [CONJ] +trigger event for [CONJ] +origin of [CONJ] +origin for [CONJ] +origins of [CONJ] +origins for [CONJ] +other possibilities of [CONJ]