training_set = Tot setul de date, este vectorul care contine toate documentele
document = Element din training_set. Fiecare document contine "data", "annotations" si "predictions"


In [73]:
import re
import json


Parse Training and Testing data from JSON

In [74]:
def parse_json(file_path):

  # Step 2: Open the file in read mode
  try:
    with open(file_path, "r") as json_file:
      # Step 3: Load the JSON data using json.load()
      parsed_file = json.load(json_file)
  except FileNotFoundError:
    print(f"Error: File not found at {file_path}")
  else:
    print("JSON data parsed successfully!")
    # Step 4: Access and process the data
    # (See examples below based on data structure)
  return parsed_file

In [75]:
training_set = parse_json("./train_data.json")
testing_set = parse_json("./test_data.json")

JSON data parsed successfully!
JSON data parsed successfully!


In [76]:
print(len(training_set))
print(len(testing_set))

254
64


In [77]:
predictions = [document["predictions"] for document in training_set]
texts = [document["data"]["text"] for document in training_set]
test_texts = [document["data"]["text"] for document in testing_set]

Extract terms given by CUTEXT

In [78]:
def extract_terms_from_file(file_path):
    terms = []
    with open(file_path, 'r') as file:
        for line in file:
            if line.startswith("Term:"):
                term = line.split("Term:")[1].strip()
                terms.append(term)
    return terms

In [79]:
def parse_terms(extracted_terms):
  new_terms = []
  for term in extracted_terms:
    if term[0].isalpha() and term[-1].isalpha() and "**" not in term and "(" not in term and ")" not in term and len(term)>3:
      new_terms.append(term)
  return new_terms

In [80]:
file_path = "./terms_raw.txt"
# Extract terms from the file
cutext_terms = extract_terms_from_file(file_path)

In [81]:
print(len(cutext_terms))

21554


In [82]:
cutext_terms = parse_terms(cutext_terms)

Extract NEG, UNC, NSCO and USCO from Training Data

In [83]:
# Gets a list of tuples representing character offsets and returns list of words
def get_words(text, offsets):
  words = []
  for start, end in offsets:
    #words.append(text[start:end-1])
    #words.append(text[start-1:end])
    if text[start-1].isalpha():
      s=start-1
    else:
      s=start
    if text[end-1].isalpha():
      e=end
    else:
      e=end-1
    words.append(text[s:e])
  return words

In [84]:
# Parses a document and returns 4 lists of tuples representing words
def find_cues_and_scopes(document):
  neg_postitions_pairs = [(result_element["value"]["start"], result_element["value"]["end"]) for result_element in document["predictions"][0]["result"] if "NEG" in result_element["value"]["labels"]]
  unc_postitions_pairs = [(result_element["value"]["start"], result_element["value"]["end"]) for result_element in document["predictions"][0]["result"] if "UNC" in result_element["value"]["labels"]]
  nsco_postitions_pairs = [(result_element["value"]["start"], result_element["value"]["end"]) for result_element in document["predictions"][0]["result"] if "NSCO" in result_element["value"]["labels"]]
  usco_postitions_pairs = [(result_element["value"]["start"], result_element["value"]["end"]) for result_element in document["predictions"][0]["result"] if "USCO" in result_element["value"]["labels"]]
  neg_words = get_words(document["data"]["text"], neg_postitions_pairs)
  unc_words = get_words(document["data"]["text"], unc_postitions_pairs)
  nsco_words = get_words(document["data"]["text"], nsco_postitions_pairs)
  usco_words = get_words(document["data"]["text"], usco_postitions_pairs)
  return neg_words, unc_words, nsco_words, usco_words

In [85]:
def create_cue_sets(document):

  neg_pre_cues = []
  neg_post_cues = []
  unc_pre_cues = []
  unc_post_cues = []

  neg_postitions_pairs = [(result_element["value"]["start"], result_element["value"]["end"]) for result_element in document["predictions"][0]["result"] if "NEG" in result_element["value"]["labels"]]
  unc_postitions_pairs = [(result_element["value"]["start"], result_element["value"]["end"]) for result_element in document["predictions"][0]["result"] if "UNC" in result_element["value"]["labels"]]
  nsco_postitions_pairs = [(result_element["value"]["start"], result_element["value"]["end"]) for result_element in document["predictions"][0]["result"] if "NSCO" in result_element["value"]["labels"]]
  usco_postitions_pairs = [(result_element["value"]["start"], result_element["value"]["end"]) for result_element in document["predictions"][0]["result"] if "USCO" in result_element["value"]["labels"]]


  neg_words = get_words(document["data"]["text"], neg_postitions_pairs)
  unc_words = get_words(document["data"]["text"], unc_postitions_pairs)
  nsco_words = get_words(document["data"]["text"], nsco_postitions_pairs)
  usco_words = get_words(document["data"]["text"], usco_postitions_pairs)


  neg_tpl = [(neg_words[i], neg_postitions_pairs[i][0], neg_postitions_pairs[i][1]) for i in range(len(neg_words))]
  unc_tpl = [(unc_words[i], unc_postitions_pairs[i][0], unc_postitions_pairs[i][1]) for i in range(len(unc_words))]
  nsco_tpl = [(nsco_words[i], nsco_postitions_pairs[i][0], nsco_postitions_pairs[i][1]) for i in range(len(nsco_words))]
  usco_tpl = [(usco_words[i], usco_postitions_pairs[i][0], usco_postitions_pairs[i][1]) for i in range(len(usco_words))]
  
  neg_tpl = sorted(neg_tpl, key=lambda x: x[1])
  unc_tpl = sorted(unc_tpl, key=lambda x: x[1])
  nsco_tpl = sorted(nsco_tpl, key=lambda x: x[1])
  usco_tpl = sorted(usco_tpl, key=lambda x: x[1])

  for i,j in zip(range(len(neg_tpl)), range(len(nsco_tpl))):
    if neg_tpl[i][1] - nsco_tpl[j][2] > 10:
        i -= 1
        continue
    elif nsco_tpl[j][1] - neg_tpl[i][2] > 10:
        j -= 1
        continue
    if neg_tpl[i][1] > nsco_tpl[i][1]:
      neg_post_cues.append(neg_tpl[i][0])
    else:
      neg_pre_cues.append(neg_tpl[i][0])

  for i,j in zip(range(len(unc_tpl)), range(len(usco_tpl))):
    if unc_tpl[i][1] - usco_tpl[j][2] > 10:
        i -= 1
        continue
    elif usco_tpl[j][1] - unc_tpl[i][2] > 10:
        j -= 1
        continue
    if unc_tpl[i][1] > usco_tpl[i][1]:
      unc_post_cues.append(unc_tpl[i][0])
    else:
      unc_pre_cues.append(unc_tpl[i][0])

  return neg_pre_cues, neg_post_cues, unc_pre_cues, unc_post_cues

In [86]:
# pre = apar inainte de scope
neg_pre_cues = set()
neg_post_cues = set()
unc_pre_cues = set()
unc_post_cues = set()

neg_pre_dict = {}
neg_post_dict = {}
unc_pre_dict = {}
unc_post_dict = {}

for document in training_set:
    neg_pre, neg_post, unc_pre, unc_post = create_cue_sets(document)
    for word in neg_pre:
        if word in neg_pre_dict:
            neg_pre_dict[word] += 1
        else:
            neg_pre_dict[word] = 1
    for word in neg_post:
        if word in neg_post_dict:
            neg_post_dict[word] += 1
        else:
            neg_post_dict[word] = 1
    for word in unc_pre:
        if word in unc_pre_dict:
            unc_pre_dict[word] += 1
        else:
            unc_pre_dict[word] = 1
    for word in unc_post:
        if word in unc_post_dict:
            unc_post_dict[word] += 1
        else:
            unc_post_dict[word] = 1
    neg_pre_cues.update(neg_pre)
    neg_post_cues.update(neg_post)
    unc_pre_cues.update(unc_pre)
    unc_post_cues.update(unc_post)


neg_pre_blacklist = set()
neg_post_blacklist = set()
unc_pre_blacklist = set()
unc_post_blacklist = set()

for word in neg_pre_cues:
    if word in neg_post_cues:
        if neg_pre_dict[word] > neg_post_dict[word]:
            neg_post_blacklist.add(word)
        else:
            neg_pre_blacklist.add(word)

for word in unc_pre_cues:
    if word in unc_post_cues:
        if unc_pre_dict[word] > unc_post_dict[word]:
            unc_post_blacklist.add(word)
        else:
            unc_pre_blacklist.add(word)

neg_pre_cues = neg_pre_cues - neg_pre_blacklist
neg_post_cues = neg_post_cues - neg_post_blacklist
unc_pre_cues = unc_pre_cues - unc_pre_blacklist
unc_post_cues = unc_post_cues - unc_post_blacklist




In [87]:
for neg in neg_pre_cues:
  if neg in neg_post_cues:
    print("E in ambele:", neg)
    

In [88]:
NEG = set()
UNC = set()
NSCO = set()
USCO = set()

num_nsco = 0
num_usco = 0
for document in training_set:
  neg_words, unc_words, nsco_words, usco_words = find_cues_and_scopes(document)
  nsco_words_set = set(nsco_words)
  usco_words_set = set(usco_words)
  num_nsco += len(nsco_words)
  num_usco += len(usco_words)

  NEG.update(neg_words)
  UNC.update(unc_words)
  NSCO.update(nsco_words)
  USCO.update(usco_words)

# Removing spaces and punctation signs from the start and end of each string
NEG = {word.strip(" ,.!?;)") for word in NEG}
UNC = {word.strip(" ,.!?);") for word in UNC}
NSCO = {word.strip(" ,.!?;)") for word in NSCO}
USCO = {word.strip(" ,.!?);") for word in USCO}

# Remove negation from UNC
for word in NEG:
  if word in UNC:
    UNC.remove(word)

Combine USCO and NSCO in SCOPE_words

In [89]:
ALL_SCOPES = NSCO.union(USCO)

# A set with all individual words from the scopes
SCOPE_words = set()         # ['erc', '(29/05/18)', 'ser', 'visibles', 'extratono', 'inicia', 'valor', 'frialdad', 'medicamentoses', 'neoformativo']
for scope in ALL_SCOPES:
  SCOPE_words.update(scope.split())

print("NEG_UNC words before processing: ", len(SCOPE_words))

# Remove all symbols and numbers from the set
SCOPE_words = {word for word in SCOPE_words if word.isalpha()}
SCOPE_words = list(SCOPE_words)

NEG_UNC words before processing:  3184


Combine SCOPE_words with extracted_terms from CUTEXT

In [90]:
extracted_terms = list(set(cutext_terms+SCOPE_words))

In [91]:
print(len(extracted_terms))


19959


In [92]:
extracted_terms.sort(key=len,reverse=True)

Prepare REGEX

In [93]:
neg_pre_cues = {word.strip(" *,.!?;)") for word in neg_pre_cues}
neg_post_cues = {word.strip(" *,.!?;)") for word in neg_post_cues}
unc_pre_cues = {word.strip(" *,.!?;)") for word in unc_pre_cues}
unc_post_cues = {word.strip(" *,.!?;)") for word in unc_post_cues}

In [94]:
NEG_pre_pattern = "|".join(neg_pre_cues)
UNC_pre_pattern = "|".join(unc_pre_cues)
NEG_post_pattern = "|".join(neg_post_cues)
UNC_post_pattern = "|".join(unc_post_cues)

In [95]:
NEG_pattern = "|".join(NEG)
UNC_pattern = "|".join(UNC)
#SCOPE pattern with CUTEXT + NSCO+USCO
SCOPE_pattern = "|".join(extracted_terms)

In [96]:
#SCOPE pattern just for CUTEXT
SCOPE_pattern_CUTEXT = "|".join(cutext_terms)

In [97]:
regex_neg_pre=rf"\b({NEG_pattern})\b\s\w*\s*\w*\s*\w*\s*\w*\s*\w*\s*\b({SCOPE_pattern}\b)"
regex_neg_pos=rf"\b({SCOPE_pattern})\b\s\b({NEG_pattern})\b"
regex_unc_pre=rf"\b({UNC_pattern})\b\s\w*\s*\w*\s*\w*\s*\w*\s*\w*\s*\b({SCOPE_pattern})\b"
regex_unc_pos=rf"\b({SCOPE_pattern})\b\s\w*\s*\w*\s*\w*\s*\w*\s*\w*\s*\b({UNC_pattern})\b"


In [98]:
regex_neg_pre =rf"\b({NEG_pattern})\b\s+((?:\b(?:{SCOPE_pattern})\b\s*){{0,5}})"
regex_unc_pre=rf"\b({UNC_pattern})\b\s+((?:\b(?:{SCOPE_pattern})\b\s*){{0,5}})"

In [99]:
regex_neg_pre=rf"\b({NEG_pattern})\b\s*((?:\b(?:{SCOPE_pattern_CUTEXT})\b\s*){{0,7}})"
regex_unc_pre=rf"\b({UNC_pattern})\b\s*((?:\b(?:{SCOPE_pattern_CUTEXT})\b\s*){{0,7}})"

In [108]:
regex_neg_pre =rf"\b({NEG_pre_pattern})\b\s*((?:\b(?:{SCOPE_pattern})\b\s*){{0,5}})"
regex_unc_pre=rf"\b({UNC_pre_pattern})\b\s*((?:\b(?:{SCOPE_pattern})\b\s*){{0,5}})"

In [109]:
regex_neg_pos =rf"\b({SCOPE_pattern})\b\s*({NEG_post_pattern})\b"
regex_unc_pos =rf"\b({SCOPE_pattern})\b\s*({UNC_post_pattern})\b"

In [107]:
print(regex_neg_pos[324000:324100])

r|ya|º|s|r|t|i|m|u|o|e|n|l|a|v|b|y)\b\s*\b*(negatiu|negativas|inespecifico|atipicos|suspendido|negat


Make Predictions

In [110]:
predictions = []
for i in range(len(test_texts)):
  dict = {"NEG":set(),"NSCO":set(),"UNC":set(),"USCO":set()}
  predictions.append(dict)


for id, test_text in enumerate(test_texts):
  neg_scopes_pre_matches = re.finditer(regex_neg_pre, test_text)
  neg_scopes_pos_matches = re.finditer(regex_neg_pos, test_text)
  unc_scopes_pre_matches = re.finditer(regex_unc_pre, test_text)
  unc_scopes_pos_matches = re.finditer(regex_unc_pos, test_text)

  if neg_scopes_pre_matches:
    for match in neg_scopes_pre_matches:
        #print("Whole match:", match.group(0))
        # Get the matched word and its starting/ending positions
        matched_word = match.group(1)
        start_pos = match.start(1)
        end_pos = match.end(1)+1
        #print(f"Found '{matched_word}' at positions ({start_pos}, {end_pos})")

        predictions[id]["NEG"].add((start_pos,end_pos,matched_word))

        # # Get the scope word
        scope_word = match.group(2)
        sc_start_pos = end_pos
        sc_end_pos = match.end(2)+1

        predictions[id]["NSCO"].add((sc_start_pos,sc_end_pos,scope_word))

        #print(f"Found scope '{scope_word}' at positions ({sc_start_pos}, {sc_end_pos})")
  
  if neg_scopes_pos_matches:
    for match in neg_scopes_pos_matches:
        #print("Whole match:", match.group(0))
        # Get the matched word and its starting/ending positions
        scope_word = match.group(1)
        sc_start_pos = match.start()
        sc_end_pos = match.end(1)+1


       #print(f"Found '{scope_word}' at positions ({sc_start_pos}, {sc_end_pos})")
        # # Get the scope word
        matched_word = match.group(2)
        start_pos = sc_end_pos
        end_pos = match.end(2)+1


        predictions[id]["NEG"].add((start_pos,end_pos,matched_word))
        predictions[id]["NSCO"].add((sc_start_pos,sc_end_pos,scope_word))

        #print(f"Found scope '{match_word}' at positions ({start_pos}, {end_pos})")
  
  if unc_scopes_pre_matches:
    for match in unc_scopes_pre_matches:
        #print("Whole match:", match.group(0))
        # Get the matched word and its starting/ending positions
        matched_word = match.group(1)
        start_pos = match.start()
        end_pos = match.end(1)+1
        #print(f"Found '{matched_word}' at positions ({start_pos}, {end_pos})")

        predictions[id]["UNC"].add((start_pos,end_pos,matched_word))


        # # Get the scope word
        scope_word = match.group(2)
        sc_start_pos = end_pos
        sc_end_pos = match.end(2)+1
        #print(f"Found scope '{scope_word}' at positions ({sc_start_pos}, {sc_end_pos})")

        predictions[id]["USCO"].add((sc_start_pos,sc_end_pos,scope_word))
    
    if unc_scopes_pos_matches:
      for match in unc_scopes_pos_matches:
          #print("Whole match:", match.group(0))
          # Get the matched word and its starting/ending positions
          scope_word = match.group(1)
          sc_start_pos = match.start()
          sc_end_pos = match.end(1)+1
          #print(f"Found '{scope_word}' at positions ({sc_start_pos}, {sc_end_pos})")
          # # Get the scope word
          matched_word = match.group(2)
          start_pos = sc_end_pos
          end_pos = match.end(2)+1
          #print(f"Found scope '{matched_word}' at positions ({start_pos}, {end_pos})")

          predictions[id]["UNC"].add((start_pos,end_pos,matched_word))
          predictions[id]["USCO"].add((sc_start_pos,sc_end_pos,scope_word))
      

Sort the text predictions by starting point

In [111]:
for dict in predictions:
    for key,value in dict.items():

      sorted_value=sorted(list(value), key=lambda x: x[0])
      dict[key] = sorted_value

Get ground thruth from testing_set

In [None]:
def get_gt_format(document):
    neg_predictions, unc_predictions, nsco_predictions, usco_predictions = [], [], [], []
    text = document["data"]["text"]
    for result_element in document["predictions"][0]["result"]:
        start = result_element["value"]["start"]
        end = result_element["value"]["end"]
        if "NEG" in result_element["value"]["labels"]:
            neg_predictions.append((start, end, text[start:end]))
        if "UNC" in result_element["value"]["labels"]:
            unc_predictions.append((start, end, text[start:end]))
        if "NSCO" in result_element["value"]["labels"]:
            nsco_predictions.append((start, end, text[start:end]))
        if "USCO" in result_element["value"]["labels"]:
            usco_predictions.append((start, end, text[start:end]))

    return neg_predictions, unc_predictions, nsco_predictions, usco_predictions

In [None]:
# FORMAT : (NEG, START, END, WORD)
def get_ground_truth(document):
    neg_results, unc_results, nsco_results, usco_results = get_gt_format(document)

    neg_results_sorted = sorted(neg_results, key=lambda x: x[0])
    unc_results_sorted = sorted(unc_results, key=lambda x: x[0])
    nsco_results_sorted = sorted(nsco_results, key=lambda x: x[0])
    usco_results_sorted = sorted(usco_results, key=lambda x: x[0])

    ground_truth_dict = {"NEG": neg_results_sorted, "UNC": unc_results_sorted, "NSCO": nsco_results_sorted, "USCO": usco_results_sorted}

    return ground_truth_dict



get_ground_truth(testing_set[0])

{'NEG': [(395, 398, 'no '),
  (499, 505, 'niega '),
  (1111, 1119, 'negativo'),
  (1141, 1144, 'no '),
  (1163, 1166, 'no '),
  (1194, 1203, 'negativos'),
  (2118, 2122, 'sin ')],
 'UNC': [],
 'NSCO': [(398, 422, 'alergias medicamentosas '),
  (505, 521, 'habitos toxicos '),
  (1107, 1111, 'vih '),
  (1144, 1150, 'inmune'),
  (1166, 1172, 'immune'),
  (1174, 1194, 'lues vih, vhb y vhc '),
  (2122, 2133, 'incidencias')],
 'USCO': []}

In [None]:
# List of dictionaries of GT docuemnts in the test set
ground_truths = [get_ground_truth(document) for document in testing_set]


Calculate Metrics

In [None]:
def calculate_metrics(predictions,ground_truths):
  precision = {"NEG":0,"NSCO":0,"UNC":0,"USCO":0}
  recall = {"NEG":0,"NSCO":0,"UNC":0,"USCO":0}
  f1 = {"NEG":0,"NSCO":0,"UNC":0,"USCO":0}
  tp = {"NEG":0,"NSCO":0,"UNC":0,"USCO":0}
  num_of_predictions = {"NEG":0,"NSCO":0,"UNC":0,"USCO":0}
  num_of_ground_truths = {"NEG":0,"NSCO":0,"UNC":0,"USCO":0}
  for d1,d2 in zip(predictions,ground_truths):

    #print(d1["UNC"])
    #print(d2["UNC"])
    for key in d1:
      #print(key)
      for elem in d1[key]:
        for elem2 in d2[key]:
          if abs(elem[0]-elem2[0]) <= 1 and abs(elem[1]-elem2[1]) <=1:
            tp[key]+=1
            break

      num_of_predictions[key]+=len(d1[key])
      num_of_ground_truths[key]+=len(d2[key])

  for key in precision:
    precision[key] = tp[key]/num_of_predictions[key]
    recall[key] = tp[key]/num_of_ground_truths[key]
    f1[key] = 2*precision[key]*recall[key]/(precision[key]+recall[key])


  return precision, recall, f1


In [116]:
precision, recall, f1 = calculate_metrics(predictions,ground_truths)

In [None]:
print(precision)
print(recall)
print(f1)

{'NEG': 0.927487352445194, 'NSCO': 0.5185497470489039, 'UNC': 0.6666666666666666, 'USCO': 0.2542372881355932}
{'NEG': 0.9717314487632509, 'NSCO': 0.5726256983240223, 'UNC': 0.9007633587786259, 'USCO': 0.3488372093023256}
{'NEG': 0.9490940465918896, 'NSCO': 0.5442477876106194, 'UNC': 0.7662337662337663, 'USCO': 0.29411764705882354}


In [None]:
print(precision)
print(recall)
print(f1)

{'NEG': 0.927487352445194, 'NSCO': 0.5185497470489039, 'UNC': 0.6666666666666666, 'USCO': 0.2542372881355932}
{'NEG': 0.9717314487632509, 'NSCO': 0.5726256983240223, 'UNC': 0.9007633587786259, 'USCO': 0.3488372093023256}
{'NEG': 0.9490940465918896, 'NSCO': 0.5442477876106194, 'UNC': 0.7662337662337663, 'USCO': 0.29411764705882354}


CUTEXT


In [None]:
print(precision)
print(recall)
print(f1)

{'NEG': 0.927487352445194, 'NSCO': 0.5185497470489039, 'UNC': 0.6666666666666666, 'USCO': 0.2542372881355932}
{'NEG': 0.9717314487632509, 'NSCO': 0.5726256983240223, 'UNC': 0.9007633587786259, 'USCO': 0.3488372093023256}
{'NEG': 0.9490940465918896, 'NSCO': 0.5442477876106194, 'UNC': 0.7662337662337663, 'USCO': 0.29411764705882354}


In [None]:
print(precision)
print(recall)
print(f1)

{'NEG': 0.9320948110185778, 'NSCO': 0.28590192644483364, 'UNC': 0.14193548387096774, 'USCO': 0.02972972972972973}
{'NEG': 1.2853356890459364, 'NSCO': 0.6080074487895717, 'UNC': 1.1755725190839694, 'USCO': 0.2558139534883721}
{'NEG': 1.0805792796138134, 'NSCO': 0.38892197736748063, 'UNC': 0.2532894736842105, 'USCO': 0.053268765133171914}


In [113]:
print(predictions[0])

{'NEG': [(395, 398, 'no'), (499, 505, 'niega'), (1111, 1120, 'negativo'), (1141, 1144, 'no'), (1163, 1166, 'no'), (1194, 1204, 'negativos'), (1313, 1322, 'negativo'), (2118, 2122, 'sin')], 'NSCO': [(398, 433, 'alergias medicamentosas conocidas '), (505, 541, 'habitos toxicos medicacio habitual '), (1107, 1111, 'vih'), (1144, 1151, 'inmune'), (1166, 1173, 'immune'), (1190, 1194, 'vhc'), (1309, 1313, 'sgb'), (2122, 2134, 'incidencias')], 'UNC': [(395, 398, 'no'), (1141, 1144, 'no'), (1163, 1166, 'no'), (2118, 2122, 'sin'), (3460, 3466, 'puede')], 'USCO': [(398, 433, 'alergias medicamentosas conocidas '), (1144, 1151, 'inmune'), (1166, 1173, 'immune'), (2122, 2134, 'incidencias'), (3466, 3467, '')]}


In [117]:
print(precision)
print(recall)
print(f1)

{'NEG': 0.9413886384129847, 'NSCO': 0.5870153291253382, 'UNC': 0.11307767944936087, 'USCO': 0.04228121927236971}
{'NEG': 0.9222614840989399, 'NSCO': 0.6061452513966481, 'UNC': 0.8778625954198473, 'USCO': 0.3333333333333333}
{'NEG': 0.9317269076305221, 'NSCO': 0.5964269354099863, 'UNC': 0.20034843205574912, 'USCO': 0.075043630017452}
