In [1]:
import json

# Re-load the generated predictions file due to code execution reset
predictions_file_path = 'generated_predictions.jsonl'
with open(predictions_file_path, 'r') as file:
    predictions = [json.loads(line) for line in file]

# Display the first few entries to understand the structure
predictions[:5]


[{'label': '{"is_fraudulent": false}', 'predict': '{"is_fraudulent": false}'},
 {'label': '{"is_fraudulent": false}', 'predict': '{"is_fraudulent": false}'},
 {'label': '{"is_fraudulent": false}', 'predict': '{"is_fraudulent": false}'},
 {'label': '{"is_fraudulent": true}', 'predict': '{"is_fraudulent": true}'},
 {'label': '{"is_fraudulent": false}', 'predict': '{"is_fraudulent": false}'}]

In [6]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Extracting labels and predictions
labels = [json.loads(entry['label'])['is_fraudulent'] for entry in predictions]
predicts = [json.loads(entry['predict'])['is_fraudulent'] for entry in predictions]

# Converting boolean to integer for metrics calculation
labels = [int(label) for label in labels]
predicts = [int(predict) for predict in predicts]

# Calculating metrics
accuracy = accuracy_score(labels, predicts)
precision = precision_score(labels, predicts)
recall = recall_score(labels, predicts)
f1 = f1_score(labels, predicts)

# First print all metrics with no approximation
print(f'Metrics: {accuracy}, {precision}, {recall}, {f1}\n')

print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')

Metrics: 0.9937219730941704, 0.9863013698630136, 0.9664429530201343, 0.9762711864406781

Accuracy: 0.99
Precision: 0.99
Recall: 0.97
F1 Score: 0.98


## Validate accounting for JSON

In [7]:
# Re-load the generated predictions file
predictions_file_path = 'generated_predictions.jsonl'
with open(predictions_file_path, 'r') as file:
    predictions = [line for line in file]

# Initialize counters
valid_responses = 0
invalid_json_structure = 0
invalid_json_content = 0

# Initialize lists for labels and predictions for valid cases
valid_labels = []
valid_predicts = []

# Parsing the predictions file
for line in predictions:
    try:
        # Attempt to parse the JSON
        prediction = json.loads(line)

        # Check if both label and predict are valid JSONs and contain the correct key
        if 'is_fraudulent' in json.loads(prediction['label']) and 'is_fraudulent' in json.loads(prediction['predict']):
            valid_responses += 1
            valid_labels.append(json.loads(prediction['label'])['is_fraudulent'])
            valid_predicts.append(json.loads(prediction['predict'])['is_fraudulent'])
        else:
            invalid_json_content += 1

    except json.JSONDecodeError:
        # Count invalid JSON structures
        invalid_json_structure += 1

# Convert boolean to integer for metrics calculation
valid_labels = [int(label) for label in valid_labels]
valid_predicts = [int(predict) for predict in valid_predicts]

# Calculating metrics for valid responses
accuracy_valid = accuracy_score(valid_labels, valid_predicts) if valid_responses > 0 else None
precision_valid = precision_score(valid_labels, valid_predicts) if valid_responses > 0 else None
recall_valid = recall_score(valid_labels, valid_predicts) if valid_responses > 0 else None
f1_valid = f1_score(valid_labels, valid_predicts) if valid_responses > 0 else None

valid_responses, invalid_json_structure, invalid_json_content, accuracy_valid, precision_valid, recall_valid, f1_valid



(1115,
 0,
 0,
 0.9937219730941704,
 0.9863013698630136,
 0.9664429530201343,
 0.9762711864406781)

In [9]:
print(f'Valid responses: {valid_responses}')
print(f'Invalid JSON structures: {invalid_json_structure}')
print(f'Invalid JSON content: {invalid_json_content}')
print(f'Accuracy for valid responses: {accuracy_valid:.2f}' if accuracy_valid is not None else 'No valid responses to calculate accuracy')
print(f'Precision for valid responses: {precision_valid:.2f}' if precision_valid is not None else 'No valid responses to calculate precision')
print(f'Recall for valid responses: {recall_valid:.2f}' if recall_valid is not None else 'No valid responses to calculate recall')
print(f'F1 Score for valid responses: {f1_valid:.2f}' if f1_valid is not None else 'No valid responses to calculate F1 score')

Valid responses: 1115
Invalid JSON structures: 0
Invalid JSON content: 0
Accuracy for valid responses: 0.99
Precision for valid responses: 0.99
Recall for valid responses: 0.97
F1 Score for valid responses: 0.98
