In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import seaborn as sns
import scipy.stats as stats
from scipy.stats import ttest_ind

In [None]:
# !pip install --upgrade openai

In [None]:
fox_model_on_fox_text = pd.read_csv('FOX_model_on_FOX_test_100.csv')
fox_model_on_cnn_text = pd.read_csv('FOX_model_on_CNN_test_100.csv')
cnn_model_on_cnn_text = pd.read_csv('CNN_model_on_CNN_test_100.csv')
cnn_model_on_fox_text = pd.read_csv('CNN_model_on_FOX_test_100.csv')

In [None]:
fox_model_on_fox_text['pred_title_length'] = fox_model_on_fox_text['pred_titles'].str.len()
fox_model_on_cnn_text['pred_title_length'] = fox_model_on_cnn_text['pred_titles'].str.len()
cnn_model_on_cnn_text['pred_title_length'] = cnn_model_on_cnn_text['pred_titles'].str.len()
cnn_model_on_fox_text['pred_title_length'] = cnn_model_on_fox_text['pred_titles'].str.len()

In [None]:
key = #Insert your OpenAI API key

In [None]:
import os
os.environ["OPENAI_API_KEY"] = key

In [None]:
import openai
from openai import OpenAI

In [None]:
client = OpenAI(
    api_key=os.environ.get(key),
)

In [None]:
def ask_chatgpt(prompt, model="gpt-4", temperature=1):
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are ChatGPT, a helpful assistant \
             who performs political bias sentiment analysis, classifying headlines \
             as either 'liberal', 'conservative', or 'neutral'."},
            {"role": "user", "content": f"In one word, all lower-case, classify the political bias sentiment\
            based on this headline:{prompt}"}
        ],
        temperature=temperature,
        #max_tokens = 10
    )
    return response

In [None]:
gpt_test0 = fox_model_on_fox_text['pred_titles'][0]
gpt_test1 = fox_model_on_fox_text['pred_titles'][1]
gpt_test2 = fox_model_on_fox_text['pred_titles'][2]

In [None]:
gpt_test0

In [None]:
# fox_model_on_fox_text
# fox_model_on_cnn_text
# cnn_model_on_cnn_text
# cnn_model_on_fox_text

In [None]:
response0 = ask_chatgpt(gpt_test0)
response1 = ask_chatgpt(gpt_test1)
response2 = ask_chatgpt(gpt_test2)

# #print(response0)

In [None]:
# response0

In [None]:
# from pprint import pprint
# pprint(response.model_dump())

In [None]:
def get_chat_response_content(response):
    # Accessing the content of the message from the response
    if response.choices and response.choices[0].message:
        message_content = response.choices[0].message.content
    else:
        message_content = "No response generated."
    return message_content

content0 = get_chat_response_content(response0)
print(content0)
content1 = get_chat_response_content(response1)
print(content1)
content2 = get_chat_response_content(response2)
print(content2)

In [None]:
def gpt_pipeline(LLM):
    result = []
    for i, title in enumerate(LLM['pred_titles']):
        #if i%10 == 0:
            #print(f"Epoch: {i}")
        response = ask_chatgpt(title)
        content = get_chat_response_content(response)
        result.append(content)
    return result

In [None]:
all_models = [fox_model_on_fox_text,fox_model_on_cnn_text,cnn_model_on_cnn_text,cnn_model_on_fox_text]

In [None]:
model_names = ['fox_model_on_fox_text','fox_model_on_cnn_text','cnn_model_on_cnn_text','cnn_model_on_fox_text']

In [None]:
all_models_sample = [fox_model_on_fox_text[:3],
                     fox_model_on_cnn_text[:3],
                     cnn_model_on_cnn_text[:3],
                     cnn_model_on_fox_text[:3]]

In [None]:
def bootstrap_gpt_outputs(models_lst,model_names,num_iter):
    results = {name: {'conservative': [], 'liberal': [], 'neutral': []} for name in model_names}

    for run in range(num_iter):
        for i, model in enumerate(models_lst):
            counts = {'conservative': 0, 'liberal': 0, 'neutral': 0, 'other': 0}

            gpt_output = gpt_pipeline(model)
            
            for response in gpt_output:
                category = response.lower()
                if category in counts:
                    counts[category] += 1
                else:
                    category['other'] += 1
                    
            for category in ['conservative', 'liberal', 'neutral']:
                results[model_names[i]][category].append(counts[category])

    return results        

In [None]:
bootstrapped_results = bootstrap_gpt_outputs(all_models,model_names,20)

In [None]:
#bootstrapped_results

In [None]:
bootstrapped_results_df = pd.DataFrame(bootstrapped_results)

In [None]:
bootstrapped_results_df.to_csv('bootstrapped_results_df.csv')

In [None]:
new_order = ['fox_model_on_fox_text', 
             'cnn_model_on_fox_text', 
             'fox_model_on_cnn_text',
             'cnn_model_on_cnn_text']

In [None]:
bootstrapped_results_df = bootstrapped_results_df[new_order]

In [None]:
dfs = []
for model_text, categories in bootstrapped_results.items():
    for category, values in categories.items():
        df = pd.DataFrame({
            'Model_Text': model_text,
            'Category': category,
            'Values': values
        })
        dfs.append(df)
df = pd.concat(dfs, ignore_index=True)

summary_stats = df.groupby(['Model_Text', 'Category']).agg(['mean', 'std', 'median'])
new_labels = ['fox_m_fox_t', 'cnn_m_fox_t', 'fox_m_cnn_t', 'cnn_m_cnn_t']


plt.figure(figsize=(10,6))
sns.boxplot(x='Model_Text', y='Values', hue='Category', data=df,order=new_order)
plt.xticks(ticks=range(len(new_labels)), labels=new_labels)
plt.title('Distribution of Sentiment Categories across Models and Texts')
plt.savefig("LLM_boxplot.png")
plt.show()

In [None]:
g = sns.FacetGrid(df, col="Model_Text", hue="Category", col_wrap=2, height=4, aspect=1.5,col_order=new_order)
g.map(sns.histplot, "Values", bins=10, kde=False)

g.add_legend()

g.set_titles("{col_name}")
g.set_axis_labels("Counts", "Frequency")
plt.show()
g.savefig("LLM_histogram_plot.png")

In [None]:
summary_stats = df.groupby(['Model_Text', 'Category']).agg(['mean', 'std', 'median'])

# Perform ANOVA to test for significant differences among categories within each model-text combination
anova_results = {}
for model_text in df['Model_Text'].unique():
    model_data = df[df['Model_Text'] == model_text]
    grouped_data = [model_data['Values'][model_data['Category'] == cat].values for cat in model_data['Category'].unique()]
    anova = stats.f_oneway(*grouped_data)
    anova_results[model_text] = anova

summary_stats, anova_results

In [None]:
for text, model1, model2 in texts:
    print(text)
    print(model1, model2)

In [None]:
results = {}
texts = [('fox_text', 'fox_model_on_fox_text', 'cnn_model_on_fox_text'), 
         ('cnn_text', 'fox_model_on_cnn_text', 'cnn_model_on_cnn_text')]

for text, model1, model2 in texts:
    results[text] = {}
    for category in ['conservative', 'liberal', 'neutral']:
        score1 = bootstrapped_results[model1][category]
        score2 = bootstrapped_results[model2][category]
        t_stat, p_value = ttest_ind(score1, score2)
        results[text][category] = {'t-statistic': t_stat, 'p-value': p_value}

results

In [None]:
rows = []
for text, categories in results.items():
    for category, stats in categories.items():
        row = {
            'Text': text,
            'Category': category,
            'T-statistic': stats['t-statistic'],
            'P-value': stats['p-value']
        }
        rows.append(row)

results_df = pd.DataFrame(rows)
results_df

In [None]:
results_df.to_csv('LLM_results_df.csv')