In [1]:
import json

# Load the JSON data from the two files
with open('test_triples_conll04.json', 'r') as file:
    data1 = json.load(file)

with open('pred_conll04.json', 'r') as file:
    data2 = json.load(file)

# Determine the length of both JSON files
length_data1 = len(data1)
length_data2 = len(data2)

# Print the lengths
print(f"Length of the golden truth JSON file: {length_data1}")
print(f"Length of the prediction JSON file: {length_data2}")

# Convert the lists to dictionaries indexed by the 'id' attribute
data1_dict = {item['id']: item for item in data1}
data2_dict = {item['id']: item for item in data2}

# Find the common IDs
common_ids = set(data1_dict.keys()) & set(data2_dict.keys())

# Extract the common data points
common_data1 = [data1_dict[id] for id in common_ids]
common_data2 = [data2_dict[id] for id in common_ids]

# Save the common data points to new JSON files
with open('Finaltest.json', 'w') as file:
    json.dump(common_data1, file, indent=4)

with open('Finalpred.json', 'w') as file:
    json.dump(common_data2, file, indent=4)

# Print a message to indicate that the new files have been created
print(f"Created new JSON files with common data points: 'common_file1.json' and 'common_file2.json'")

Length of the golden truth JSON file: 288
Length of the prediction JSON file: 288
Created new JSON files with common data points: 'common_file1.json' and 'common_file2.json'


In [3]:
import json

# Load the JSON data from the pred.json file
with open('pred_conll04.json', 'r') as file:
    pred_data = json.load(file)

# Initialize a counter for entries with "triples" as a string
string_triples_count = 0
string_triples = []

# Iterate over the entries and check the type of "triples"
for entry in pred_data:
    if 'triples' in entry and isinstance(entry['triples'], str):
        string_triples_count += 1
        string_triples.append(entry['triples'])

# Print the count of such entries

print(f"Number of entries with 'triples' as a string: {string_triples_count}")

for i, string in enumerate(string_triples, start=1):
    print(f"String {i}: {string}")

Number of entries with 'triples' as a string: 26
String 1:  {"action": "extract_text_tripleets", "action_input": "There was no mention of the ` iron triangle ` of members of Congress, the news media and special interest groups who, in a speech to political appointees in Washington on Dec. 13, Reagan claimed had prevented his administration from balancing the federal budget."}
String 2:  {"action": "extract_text_triplets", "action_input": "Aguadilla is a city in Puerto Rico."}

Or if you meant to provide the triplet for the statement "Aguadilla is in Puerto Rico", then the response would be:
{"action": "extract_text_triplets", "action_input": "Aguadilla is in Puerto Rico."}
String 3:  {"action": "extract_text_triplets", "action\_input": "The old bugaboo is where this baby is going to hit," " said John Jamison, a National Weather Service meteorologist in Galveston."}

This will extract subject-verb-object triplets from the given text, such as ["John Jamison", "said", "was going to hit"] 

In [4]:
Hallucination = 26/288

In [5]:
Hallucination

0.09027777777777778

In [6]:
import json

# Load the JSON data from the two files
with open('test_triples_conll04.json', 'r') as file:
    data1 = json.load(file)

with open('pred_conll04.json', 'r') as file:
    data2 = json.load(file)

# Find the IDs of entries with "triples" as a string in file1
ids_to_remove = [entry['id'] for entry in data2 if 'triples' in entry and isinstance(entry['triples'], str)]

# Remove the entries from both files
filtered_data1 = [entry for entry in data1 if entry['id'] not in ids_to_remove]
filtered_data2 = [entry for entry in data2 if entry['id'] not in ids_to_remove]

# Save the filtered data back to new JSON files
with open('Finaltest.json', 'w') as file:
    json.dump(filtered_data1 , file, indent=4)

with open('Finalpred.json', 'w') as file:
    json.dump(filtered_data2, file, indent=4)

# Print a message to indicate that the entries have been removed
print(f"Entries with 'triples' as a string have been removed. New files created: 'filtered_file1.json' and 'filtered_file2.json'")

Entries with 'triples' as a string have been removed. New files created: 'filtered_file1.json' and 'filtered_file2.json'


In [1]:
import json

# Function to calculate precision, recall, and F1 score
def calculate_scores(tp, total_golden, total_prediction):
    precision = tp / total_prediction if total_prediction > 0 else 0
    recall = tp / total_golden if total_golden > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return precision, recall, f1

# Function to process the files and calculate the scores, considering extras
def evaluate_predictions_corrected(golden_file, prediction_file):
    # Load the golden truths and predictions
    with open(golden_file, 'r') as f:
        golden_data = json.load(f)
    with open(prediction_file, 'r') as f:
        prediction_data = json.load(f)

    tp = 0
    extras = 0

    # Convert golden data and prediction data into dictionaries for easier access
    golden_dict = {item['id']: set(tuple(triple.items()) for triple in item['triples']) for item in golden_data}
    prediction_dict = {item['id']: set(tuple(triple.items()) for triple in item['triples']) for item in prediction_data}

    # Iterate over each instance in the golden data to calculate true positives
    for id, golden_triples in golden_dict.items():
        prediction_triples = prediction_dict.get(id, set())
        tp += len(golden_triples & prediction_triples)

    # Calculate extras in prediction
    for id, prediction_triples in prediction_dict.items():
        if id not in golden_dict:
            extras += len(prediction_triples)
        else:
            unmatched_triples = prediction_triples - golden_dict[id]
            extras += len(unmatched_triples)

    # Calculate micro scores
    total_golden = sum(len(triples) for triples in golden_dict.values())
    total_prediction = sum(len(triples) for triples in prediction_dict.values())
    precision_micro, recall_micro, f1_micro = calculate_scores(tp, total_golden, total_prediction)

    # Calculate macro scores
    total_items = len(golden_dict)
    precision_macro, recall_macro, f1_macro = 0, 0, 0
    for id, golden_triples in golden_dict.items():
        prediction_triples = prediction_dict.get(id, set())
        tp = len(golden_triples & prediction_triples)
        precision, recall, _ = calculate_scores(tp, len(golden_triples), len(prediction_triples))
        precision_macro += precision
        recall_macro += recall
    precision_macro /= total_items
    recall_macro /= total_items
    f1_macro = 2 * (precision_macro * recall_macro) / (precision_macro + recall_macro) if (precision_macro + recall_macro) > 0 else 0

    return {
        'micro': {
            'precision': precision_micro,
            'recall': recall_micro,
            'f1': f1_micro
        },
        'macro': {
            'precision': precision_macro,
            'recall': recall_macro,
            'f1': f1_macro
        },
        'true_positives': tp,
        'extras': extras
    }

# Example usage



In [2]:
scores = evaluate_predictions_corrected('Finaltest.json', 'Finalpred.json')
print("Micro Scores:", scores['micro'])
print("Macro Scores:", scores['macro'])

Micro Scores: {'precision': 0.042328042328042326, 'recall': 0.042440318302387266, 'f1': 0.0423841059602649}
Macro Scores: {'precision': 0.05216284987277353, 'recall': 0.04961832061068702, 'f1': 0.0508587786259542}


In [3]:
import json

# Function to calculate precision, recall, and F1 score
def calculate_scores(tp, total_golden, total_prediction):
    precision = tp / total_prediction if total_prediction > 0 else 0
    recall = tp / total_golden if total_golden > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return precision, recall, f1

# Function to process the files and calculate the scores, considering extras
def evaluate_predictions_corrected(golden_file, prediction_file):
    # Load the golden truths and predictions
    with open(golden_file, 'r') as f:
        golden_data = json.load(f)
    with open(prediction_file, 'r') as f:
        prediction_data = json.load(f)

    tp = 0
    extras = 0

    # Convert golden data and prediction data into dictionaries for easier access
    golden_dict = {item['id']: set(tuple(triple.items()) for triple in item['triples']) for item in golden_data}
    prediction_dict = {item['id']: set(tuple(triple.items()) for triple in item['triples']) for item in prediction_data}

    # Iterate over each instance in the golden data to calculate true positives
    for id, golden_triples in golden_dict.items():
        prediction_triples = prediction_dict.get(id, set())
        tp += len(golden_triples & prediction_triples)

    # Calculate extras in prediction
    for id, prediction_triples in prediction_dict.items():
        if id not in golden_dict:
            extras += len(prediction_triples)
        else:
            unmatched_triples = prediction_triples - golden_dict[id]
            extras += len(unmatched_triples)

    # Calculate scores
    precision, recall, f1 = calculate_scores(tp, sum(len(triples) for triples in golden_dict.values()), sum(len(triples) for triples in prediction_dict.values()))

    return {
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'true_positives': tp,
        'extras': extras
    }

# Example usage


In [4]:
scores = evaluate_predictions_corrected('Finaltest.json', 'Finalpred.json')


In [5]:
scores

{'precision': 0.042328042328042326,
 'recall': 0.042440318302387266,
 'f1': 0.0423841059602649,
 'true_positives': 16,
 'extras': 362}