In [None]:
%pip install Levenshtein

Collecting Levenshtein
  Downloading Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.3 kB)
Collecting rapidfuzz<4.0.0,>=3.8.0 (from Levenshtein)
  Downloading rapidfuzz-3.9.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (177 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.4/177.4 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rapidfuzz-3.9.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz, Levenshtein
Successfully installed Levenshtein-0.25.1 rapidfuzz-3.9.7


In [None]:
import re
import Levenshtein

# Writing the necessary functions

In [None]:
# One of the graph database contains the codes of the states as entities. Since this was not specified in the prompt, the base LLM extracted the full name of the states.
# We are not going to count this as an error (since we did not provide the instruction explicitely)
# This dictionary is going to be used to transform each proper name of the states into their code

# prompt: Write a python dictionary in which the keys are all the states of the United States and the value is their corresponding ufficial codes. Example: {"california": "CA", ...}

us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

us_state_abbrev_lower = {k.lower(): v.lower() for k, v in us_state_abbrev.items()}

In [None]:
# regex function to remove from a string all the spaces that are not inside round or rectangular brackets

def remove_spaces_outside_brackets(text):
  pattern = r'\s+(?![^\(\)\[\]]*[\)\]])'
  return re.sub(pattern, '', text)

In [None]:
# Splits triples from the ground truth responses
def extract_triples(text):
  text = remove_spaces_outside_brackets(text).lower()
  pattern = r'\(.*?\)-\[.*?\]->\(.*?\)'
  return list(set(re.findall(pattern, text)))

# Splits triples from the responses generated by the LLM
def extract_triples_from_responses(text):
  text = remove_spaces_outside_brackets(text).lower()
  text = text.replace(" bill", "")
  text = text.replace('"', '').replace("'", "")
  for k in us_state_abbrev_lower.keys():
    if k in text:
      text = text.replace(k, us_state_abbrev_lower[k])
  pattern = r'\(.*?\)-\[.*?\]->\(.*?\)'
  return list(set(re.findall(pattern, text)))

# Returns the number of matches divided by the number of original triples
# How many of the ground truth triples have been extracted from the LLM?
def recall(GT_triples, Ex_triples):
  if len(GT_triples) == 0:
    return 0
  matches = 0
  for GT_triple in GT_triples:
    for Ex_triple in Ex_triples:
      # A match is defined by a Levenshtein ratio > 0.95
      if Levenshtein.ratio(GT_triple, Ex_triple) > 0.95:
        matches += 1
        break
  return matches/len(GT_triples)

# Returns the number of matches divided by the number of extracted triples
# How many of the extracted triples are correct?
def precision(GT_triples, Ex_triples):
  if len(Ex_triples) == 0:
    return 0
  matches = 0
  for Ex_triple in Ex_triples:
    for GT_triple in GT_triples:
      # A match is defined by a Levenshtein ratio > 0.95
      if Levenshtein.ratio(Ex_triple, GT_triple) > 0.95:
        matches += 1
        break
  return matches/len(Ex_triples)

# 4 Original Databases

In [None]:
# importing the module
import json

# Opening JSON file
with open('responses.json') as json_file:
    data = json.load(json_file)

In [None]:
data = json.loads(data)

## Non Finetuned LLM

In [None]:
results = []
for el in data:
  extracted_triples= extract_triples(el["triples"])
  extracted_NF_triples= extract_triples_from_responses(el["NF_triples"])
  results.append([recall(extracted_triples, extracted_NF_triples), precision(extracted_triples, extracted_NF_triples)])

In [None]:
#Average precision
tot = 0
for el in results:
  tot += el[1]
print("Average precision: " + str(tot/400))

#Average recall
tot = 0
for el in results:
  tot += el[0]
print("Average recall: " + str(tot/400))

Average precision: 0.25342442279942273
Average recall: 0.23680127511377527


## Non Finetuned LLM with Few Shot Prompting

In [None]:
results = []
for el in data:
  extracted_triples= extract_triples(el["triples"])
  extracted_NF_triples= extract_triples_from_responses(el["FS_triples"])
  results.append([recall(extracted_triples, extracted_NF_triples), precision(extracted_triples, extracted_NF_triples)])

In [None]:
#Average precision
tot = 0
for el in results:
  tot += el[1]
print("Average precision: " + str(tot/400))

#Average recall
tot = 0
for el in results:
  tot += el[0]
print("Average recall: " + str(tot/400))

Average precision: 0.6261810619935617
Average recall: 0.5409542653983442


## Finetuned LLM

In [None]:
results = []
for el in data:
  extracted_triples= extract_triples(el["triples"])
  extracted_NF_triples= extract_triples_from_responses(el["FT_triples"])
  results.append([recall(extracted_triples, extracted_NF_triples), precision(extracted_triples, extracted_NF_triples)])

In [None]:
#Average precision
tot = 0
for el in results:
  tot += el[1]
print("Average precision: " + str(tot/400))

#Average recall
tot = 0
for el in results:
  tot += el[0]
print("Average recall: " + str(tot/400))

Average precision: 0.8115761097562568
Average recall: 0.7673880448864003


# 2 External Databases

In [None]:
# importing the module
import json

# Opening JSON file
with open('responses_external_dbs.json') as json_file:
    data = json.load(json_file)

In [None]:
data = json.loads(data)

## Non Finetuned LLM

In [None]:
results_NF = []
for el in data:
  extracted_triples= extract_triples(el["triples"])
  extracted_NF_triples= extract_triples_from_responses(el["NF_triples"])
  results_NF.append([recall(extracted_triples, extracted_NF_triples), precision(extracted_triples, extracted_NF_triples)])

In [None]:
#Average precision
tot = 0
for el in results:
  tot += el[1]
print("Average precision: " + str(tot/200))

#Average recall
tot = 0
for el in results:
  tot += el[0]
print("Average recall: " + str(tot/200))

Average precision: 0.09162961760461762
Average recall: 0.07744950800833154


## Non Finetuned LLM with Few Shot Prompting

In [None]:
results = []
for el in data:
  extracted_triples= extract_triples(el["triples"])
  extracted_NF_triples= extract_triples_from_responses(el["FS_triples"])
  results.append([recall(extracted_triples, extracted_NF_triples), precision(extracted_triples, extracted_NF_triples)])

In [None]:
#Average precision
tot = 0
for el in results:
  tot += el[1]
print("Average precision: " + str(tot/200))

#Average recall
tot = 0
for el in results:
  tot += el[0]
print("Average recall: " + str(tot/200))

Average precision: 0.7213288378288375
Average recall: 0.5671780984538338


## Finetuned LLM

In [None]:
results_FT = []
for el in data:
  extracted_triples= extract_triples(el["triples"])
  extracted_NF_triples= extract_triples_from_responses(el["FT_triples"])
  results_FT.append([recall(extracted_triples, extracted_NF_triples), precision(extracted_triples, extracted_NF_triples)])

In [None]:
#Average precision
tot = 0
for el in results:
  tot += el[1]
print("Average precision: " + str(tot/200))

#Average recall
tot = 0
for el in results:
  tot += el[0]
print("Average recall: " + str(tot/200))

Average precision: 0.3764262265512265
Average recall: 0.3150033801982332


## Finetuned LLM with Few Shot Prompting

In [None]:
results = []
for el in data:
  extracted_triples= extract_triples(el["triples"])
  extracted_NF_triples= extract_triples_from_responses(el["FT_FS_triples"])
  results.append([recall(extracted_triples, extracted_NF_triples), precision(extracted_triples, extracted_NF_triples)])

In [None]:
#Average precision
tot = 0
for el in results:
  tot += el[1]
print("Average precision: " + str(tot/200))

#Average recall
tot = 0
for el in results:
  tot += el[0]
print("Average recall: " + str(tot/200))

Average precision: 0.6884821706071703
Average recall: 0.550634228108493


## Finetuned LLM 2 (step 50)

In [None]:
results = []
for el in data:
  extracted_triples= extract_triples(el["triples"])
  extracted_NF_triples= extract_triples_from_responses(el["FSFT_triples_50"])
  results.append([recall(extracted_triples, extracted_NF_triples), precision(extracted_triples, extracted_NF_triples)])

In [None]:
#Average precision
tot = 0
for el in results:
  tot += el[1]
print("Average precision: " + str(tot/200))

#Average recall
tot = 0
for el in results:
  tot += el[0]
print("Average recall: " + str(tot/200))

Average precision: 0.7182154385824464
Average recall: 0.6641798773122303


## Finetuned LLM 2 (step 150)

In [None]:
results = []
for el in data:
  extracted_triples= extract_triples(el["triples"])
  extracted_NF_triples= extract_triples_from_responses(el["FSFT_triples_150"])
  results.append([recall(extracted_triples, extracted_NF_triples), precision(extracted_triples, extracted_NF_triples)])

In [None]:
#Average precision
tot = 0
for el in results:
  tot += el[1]
print("Average precision: " + str(tot/200))

#Average recall
tot = 0
for el in results:
  tot += el[0]
print("Average recall: " + str(tot/200))

Average precision: 0.6798755133755132
Average recall: 0.619570431202784


## Finetuned LLM 2 (step 300)

In [None]:
results = []
for el in data:
  extracted_triples= extract_triples(el["triples"])
  extracted_NF_triples= extract_triples_from_responses(el["FSFT_triples_300"])
  results.append([recall(extracted_triples, extracted_NF_triples), precision(extracted_triples, extracted_NF_triples)])

In [None]:
#Average precision
tot = 0
for el in results:
  tot += el[1]
print("Average precision: " + str(tot/200))

#Average recall
tot = 0
for el in results:
  tot += el[0]
print("Average recall: " + str(tot/200))

Average precision: 0.5134541222666222
Average recall: 0.45341587987176213
