In [1]:
import json
import pandas as pd
import os
import re

## Read Data

US Presidential Inaugural Speeches Data: https://www.kaggle.com/datasets/adhok93/presidentialaddress

In [2]:
df = pd.read_csv(
    "data/inaug_speeches.csv", encoding="latin1", index_col=[0]
).reset_index(drop=True)

In [3]:
df.tail()

Unnamed: 0,Name,Inaugural Address,Date,text
53,George W. Bush,First Inaugural Address,"Saturday, January 20, 2001","President Clinton, distinguished g..."
54,George W. Bush,Second Inaugural Address,"Thursday, January 20, 2005","Vice President Cheney, Mr. Chief J..."
55,Barack Obama,First Inaugural Address,"Tuesday, January 20, 2009",My fellow citizens: I stand h...
56,Barack Obama,Second Inaugural Address,"Monday, January 21, 2013","Vice President Biden, Mr. Chief Ju..."
57,Donald J. Trump,Inaugural Address,"Friday, January 20, 2017","Chief Justice Roberts, President Ca..."


## GPT Checks

In [4]:
def clean_text(text):
    # Remove URLs
    text = re.sub(r"http\S+", "", text)

    # Remove emails
    text = re.sub(r"\S+@\S+", "", text)

    # Remove hashtags
    text = re.sub(r"#\S+", "", text)

    # Remove new line characters
    text = text.replace("\n", " ")

    # Convert to Unicode
    text = text.encode("ascii", "ignore").decode()

    # Remove left and right spaces
    text = " ".join(text.split())

    # Rejoin sentences and return cleaned text
    return text

In [5]:
df["text"] = df["text"].apply(clean_text)

In [6]:
df["text"].iloc[-1][:200]

'Chief Justice Roberts, President Carter, President Clinton, President Bush, President Obama, fellow Americans and people of the world, thank you. We, the citizens of America, are now joined in a great'

In [7]:
# Below are the url that were used for gpt-checking:

# zerogpt.com
zerogpt_url = "https://api.zerogpt.com/api/detect/detectText"

# https://writer.com/ai-content-detector/
writer_url = "https://writer.com/wp-admin/admin-ajax.php"

# https://contentatscale.ai/ai-content-detector/
contentatscale_url = "https://contentatscale.ai/ai-content-detector/"

# https://crossplag.com/ai-content-detector/
crossplag_url = (
    "https://j1o8u6du62.execute-api.eu-central-1.amazonaws.com/production/detect"
)

## Aggregating results

In [8]:
results = {}
for file in os.listdir("results"):
    with open(f"results/{file}", "r") as f:
        results[file] = json.load(f)

In [9]:
ai_detected_df = pd.DataFrame()

In [10]:
crossplag_results = {
    x: float(results["crossplag.json"][x]["dataToreturn"]["aiIndex"])
    for x in results["crossplag.json"]
}
ai_detected_df["crossplag"] = pd.DataFrame.from_dict(crossplag_results, orient="index")

In [11]:
writer_results = {
    x: 1 - results["writer.json"][x][0]["score"] for x in results["writer.json"]
}
ai_detected_df["writer"] = pd.DataFrame.from_dict(writer_results, orient="index")

In [12]:
zerogpt_results = {
    x: results["zerogpt.json"][x]["data"]["fakePercentage"] / 100
    for x in results["zerogpt.json"]
}
ai_detected_df["zerogpt"] = pd.DataFrame.from_dict(zerogpt_results, orient="index")

In [13]:
contentatscale_results = {
    x: 1 - float(results["contentatscale.json"][x]["score"]) / 100
    for x in results["contentatscale.json"]
}
ai_detected_df["contentatscale"] = pd.DataFrame.from_dict(
    contentatscale_results, orient="index"
)

In [14]:
ai_detected_df.index = ai_detected_df.index.astype(int)

## Joining both dataframes and grouping by name

In [15]:
agg_df = df[["Name"]].join(ai_detected_df)

In [16]:
agg_df.head()

Unnamed: 0,Name,crossplag,writer,zerogpt,contentatscale
0,George Washington,0.0,0.022695,0.0,0.0
1,George Washington,0.0,0.005919,0.0,0.0
2,John Adams,0.68,0.108756,0.0272,0.0
3,Thomas Jefferson,0.84,0.006442,0.0,0.0
4,Thomas Jefferson,0.0,0.009407,0.0,0.0


In [17]:
max_grouped_df = agg_df.groupby("Name").max()

In [18]:
max_grouped_df.describe()

Unnamed: 0,crossplag,writer,zerogpt,contentatscale
count,39.0,39.0,39.0,39.0
mean,0.645385,0.026563,0.126051,0.00359
std,0.254381,0.07102,0.13415,0.022418
min,0.0,0.000519,0.0,0.0
25%,0.59,0.001606,0.0425,0.0
50%,0.68,0.007392,0.0914,0.0
75%,0.825,0.017877,0.15,0.0
max,1.0,0.434982,0.6064,0.14


In [19]:
sorted_agg_df = max_grouped_df.iloc[:, 1:].rank().sum(axis=1).sort_values()

In [20]:
max_grouped_df.loc[sorted_agg_df[:10].index]

Unnamed: 0_level_0,crossplag,writer,zerogpt,contentatscale
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Warren G. Harding,0.66,0.000519,0.0,0.0
Rutherford B. Hayes,0.6,0.001293,0.0425,0.0
Calvin Coolidge,0.84,0.000929,0.0577,0.0
Lyndon Baines Johnson,0.0,0.000794,0.0691,0.0
Benjamin Harrison,0.66,0.003233,0.009,0.0
John Quincy Adams,0.64,0.002494,0.0187,0.0
James Knox Polk,0.66,0.000773,0.0876,0.0
Thomas Jefferson,0.84,0.009407,0.0,0.0
James Monroe,0.65,0.001981,0.0627,0.0
Grover Cleveland,0.75,0.002827,0.055,0.0


In [21]:
max_grouped_df.loc[sorted_agg_df[-10:].index]

Unnamed: 0_level_0,crossplag,writer,zerogpt,contentatscale
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bill Clinton,0.85,0.007606,0.187,0.0
Ronald Reagan,0.83,0.007387,0.3198,0.0
Theodore Roosevelt,0.5,0.012925,0.1541,0.0
Franklin D. Roosevelt,0.86,0.015986,0.1616,0.0
Donald J. Trump,0.66,0.008172,0.5522,0.0
John F. Kennedy,0.69,0.031327,0.1448,0.0
James Buchanan,0.88,0.016679,0.2433,0.0
Barack Obama,0.89,0.065729,0.228,0.0
Harry S. Truman,0.64,0.045691,0.3132,0.0
George W. Bush,0.87,0.019075,0.1044,0.14


In [22]:
max_grouped_df[max_grouped_df.max(axis=1) >= 0.75]

Unnamed: 0_level_0,crossplag,writer,zerogpt,contentatscale
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Andrew Jackson,0.78,0.007392,0.0375,0.0
Barack Obama,0.89,0.065729,0.228,0.0
Bill Clinton,0.85,0.007606,0.187,0.0
Calvin Coolidge,0.84,0.000929,0.0577,0.0
Dwight D. Eisenhower,0.78,0.066864,0.0425,0.0
Franklin D. Roosevelt,0.86,0.015986,0.1616,0.0
George W. Bush,0.87,0.019075,0.1044,0.14
Grover Cleveland,0.75,0.002827,0.055,0.0
Herbert Hoover,0.78,0.008473,0.0504,0.0
James A. Garfield,0.81,0.000594,0.1448,0.0
