In [1]:
import os
import sys

notebook_dir = os.getcwd()
root = os.path.abspath(os.path.join(notebook_dir, '../'))
sys.path.append(root)

In [2]:
import os
import json
import pandas as pd
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Read results from JSON files in results directory
results_dir = root + "/results"
results_files = [f for f in os.listdir(results_dir) if f.endswith('.json')]
latest_result = max(results_files, key=lambda x: os.path.getctime(os.path.join(results_dir, x)))

with open(os.path.join(results_dir, latest_result), 'r') as f:
    data = json.load(f)

# Extract predictions and actuals
predictions = [run['outputs']['label'] for run in data['results']['runs']]
actuals = [example['outputs']['label'] for example in data['results']['examples']]

actuals = list(map(lambda x: "Not Misinfo" if x == 1 else "Misinfo", actuals))
predictions = list(map(lambda x: "Not Misinfo" if x == 1 else "Misinfo", predictions))

In [3]:



# Compute confusion matrix
cm = confusion_matrix(actuals, predictions)
labels = ['Not Misinfo', 'Misinfo']

# Create heatmap
fig = go.Figure(data=go.Heatmap(
    z=cm,
    x=labels,
    y=labels,
    text=cm,
    texttemplate="%{text}",
    textfont={"size": 16},
    colorscale='Blues'
))

# Update layout
fig.update_layout(
    title='Misinformation Detection Confusion Matrix',
    xaxis_title='Predicted Label',
    yaxis_title='True Label',
    width=700,
    height=600
)

# Calculate metrics
metrics = {
    'Accuracy': accuracy_score(actuals, predictions),
    'Precision': precision_score(actuals, predictions, pos_label='Misinfo'),
    'Recall': recall_score(actuals, predictions, pos_label='Misinfo'),
    'F1 Score': f1_score(actuals, predictions, pos_label='Misinfo')
}

# Display metrics
print("\nPerformance Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value:.3f}")

# Show interactive plot
fig.show()



Performance Metrics:
Accuracy: 0.740
Precision: 0.900
Recall: 0.540
F1 Score: 0.675
