In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import json
import re

# Load the data from the JSON file
with open('LIAR-Evaluated.json', 'r') as f:
    data = json.load(f)

# Convert the data into a pandas DataFrame
df = pd.DataFrame(data)

def clean_percentage(value):
    if isinstance(value, str):
        match = re.search(r'\d+(?:\.\d+)?', value)
        if match:
            return float(match.group())
    return 0  # or return None if you prefer

# Clean the true certainties
df['true certainty1'] = df['true certainty1'].apply(clean_percentage)
df['true certainty2'] = df['true certainty2'].apply(clean_percentage)

# Calculate the difference in true certainties
df['certainty difference'] = abs(df['true certainty1'] - df['true certainty2'])

# Calculate whether the prediction was correct
df['correct'] = df['prediction'] == df['truth']

# Create a new column for the classification status
df['classification1'] = df['classification1'].str.lower()
df['classification2'] = df['classification2'].str.lower()

print(df.head())


                                          statement1 classification1   
0  An image of empty grocery store shelves shows ...           false  \
1  “Raising the debt limit is paying our old debt...            true   
2  People who are vaccinated for the coronavirus ...           false   
3  Dr. Anthony Fauci and U.S. health officials pl...           false   
4  Says Dr. Anthony Fauci said he will not “take ...           false   

                                        explanation1 true certainty1   
0  \n\nWhile the statement suggests a causal rela...              75  \
1  The statement accurately reflects the purpose ...              90   
2  While vaccination significantly reduces the ch...              90   
3  The statement seems to make a speculative clai...              10   
4  There is no verifiable evidence or credible so...              80   

                                          statement2 classification2   
0  “Europe COMPLETELY BANS the Moderna vaccine fo...          

In [None]:
classification_counts = df['classification1'].value_counts(normalize=True) * 100
print("Percentage of 'true' classifications in 'classification1':", classification_counts['true'])

classification_counts = df['classification2'].value_counts(normalize=True) * 100
print("Percentage of 'true' classifications in 'classification2':", classification_counts['true'])

classification_counts = df['classification status'].value_counts(normalize=True) * 100
print("Percentage of 'One True One False' classifications:", classification_counts['One True One False'])
print("Percentage of 'Both True' classifications:", classification_counts['Both True'])
print("Percentage of 'Both False' classifications:", classification_counts['Both False'])


In [None]:


df['classification status'] = 'One True One False'
df.loc[(df['classification1'] == 'true') & (df['classification2'] == 'true'), 'classification status'] = 'Both True'
df.loc[(df['classification1'] == 'false') & (df['classification2'] == 'false'), 'classification status'] = 'Both False'

# Plot the accuracy by classification status
plt.figure(figsize=(10, 6))
ax = df.groupby('classification status')['correct'].mean().plot(kind='bar')
plt.title('Accuracy by Classification Status')
plt.xlabel('Classification Status')
plt.ylabel('Percentage Correct')

# Add the percentage values on top of each bar
for p in ax.patches:
    ax.annotate(format(p.get_height(), '.2f'), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

plt.show()

# Define bins for the certainty difference
bins = [0, 10, 20, df['certainty difference'].max()]
labels = ['<10%', '10-20%', '>20%']
df['certainty difference bin'] = pd.cut(df['certainty difference'], bins=bins, labels=labels)

# Plot the accuracy by certainty difference
plt.figure(figsize=(10, 6))
ax = df.groupby('certainty difference bin')['correct'].mean().plot(kind='bar')
plt.title('Accuracy by Certainty Difference')
plt.xlabel('Certainty Difference')
plt.ylabel('Percentage Correct')

# Add the percentage values on top of each bar
for p in ax.patches:
    ax.annotate(format(p.get_height(), '.2f'), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

plt.show()

In [6]:
classification_counts = df['classification status'].value_counts(normalize=True) * 100
print(classification_counts)


classification status
Both False            68.316832
One True One False    30.693069
Both True              0.990099
Name: proportion, dtype: float64
