# Load in the data - X-Fact

In [7]:
import pandas as pd
import os

os.chdir(
    r"C:\Users\annal\OneDrive\Desktop\llm-misinfo-eval\data"
)  # this line is what I've been using to solve a no such directory error - copy paste the path of data folder
df = pd.read_csv(
    r"x_fact_dataset/x-fact-including-en/train.all.tsv",
    delimiter="\t",
    on_bad_lines="skip",
)

# Get results from Cohere

In [8]:
import cohere
from dotenv import load_dotenv

load_dotenv(r"C:\Users\annal\OneDrive\Desktop\llm-misinfo-eval\cohere.env")
key = os.getenv("Cohere_API_Key")
co = cohere.Client(key)

In [9]:
import time
from tqdm import trange

cohere_resp = []

for i in trange(1000):
    tmp = {"cohere_resp": None, "index": i, "label": df["label"][i]}
    claim = df["claim"][i]
    try:
        response = co.chat(
            message=f"Rate the truthfulness of the following statement: {claim} Provide a score from 0 to 100, where 0 represents definitively false and 100 represents definitively true. Do not provide any explanations, only respond with the numerical score.",
            connectors=[{"id": "web-search"}],
        )
        tmp["cohere_resp"] = response.text
        cohere_resp.append(tmp)
    except cohere.CohereAPIError:  # looks like we got an error because there were too many tokens in a query, so i'm going to just bypass such cases
        # alternatively: the default is a model with max 4096 tokens, but we could go up to 8192 (however, those models are less stable)
        continue
    time.sleep(1)

  0%|          | 1/1000 [00:15<4:12:06, 15.14s/it]


KeyboardInterrupt: 

In [None]:
import termcolor

df_results0 = pd.DataFrame(cohere_resp)
df_results0.info()
df_results0.tail()

# for i in range(df_results0.shape[0]):
#     print(termcolor.colored("Claim language: ", "green")+df["language"][i])
#     true_label = df_results0["label"][i]
#     print(termcolor.colored("True label: ","magenta")+ termcolor.colored(true_label,"magenta"))
#     print(termcolor.colored("Zero-Shot Response: ", "cyan")+df_results0["cohere_resp"][i])
#     print("\n---\n")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 996 entries, 0 to 995
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   cohere_resp  996 non-null    object
 1   index        996 non-null    int64 
 2   label        987 non-null    object
dtypes: int64(1), object(2)
memory usage: 23.5+ KB


Unnamed: 0,cohere_resp,index,label
991,85,995,partly true/misleading
992,95,996,partly true/misleading
993,95,997,false
994,"This statement is mostly true, albeit with som...",998,complicated/hard to categorise
995,84,999,partly true/misleading


# Saving Cohere's Responses to CSV
Don't want to have to repeat the calls to cohere if we want to do more analysis on the same data!

In [None]:
df_results0.to_csv("cohere_results", sep=",", index=False, encoding="utf-8")

# Recovering Old Results

In [10]:
df_results0 = pd.read_csv(r"cohere_results", delimiter=",", on_bad_lines="skip")

# Overall Characteristics and Performance

# Giving Cohere a little help?
In most cases where Cohere doesn't provide a numerical answer, it provides an answer with some explanation (which we didn't want) and the score, usually in a sentence like "I give this claim a score of...". Let's try to use that data:

In [9]:
import re


def cohere_tv_help(row):
    resp = row["cohere_resp"]

    try:
        resp = float(resp)
    except ValueError:
        resp_helped = re.findall(
            r"(score of )([1-90]+)", resp
        )  # this is going to cut off decimal points - but do they really matter?
        if resp_helped == []:
            return "unknown"
        (x, score) = resp_helped[
            0
        ]  # really we should only see one, and we need to look at the second element of the pattern(which should be the score
        resp = float(score)

    if resp > 50:
        return "true"

    else:
        return "false"


df_results0["cohere_simple_helped"] = df_results0.apply(cohere_tv_help, axis=1)


def match_tv_helped(row):
    if row["simple_truth"] == row["cohere_simple_helped"]:
        return 1
    else:
        return 0


df_results0["match_helped"] = df_results0.apply(match_tv_helped, axis=1)
df_simple = df_results0.groupby("language")[["match_helped", "match"]].agg(np.mean)

df_simple.head(
    50
)  # we don't appear to be seeing any meaningful change in accuracy - but was there a change in understanding?

  df_simple = df_results0.groupby("language")[["match_helped", "match"]].agg(np.mean)


Unnamed: 0_level_0,match_helped,match
language,Unnamed: 1_level_1,Unnamed: 2_level_1
ar,0.434783,0.434783
de,0.394737,0.394737
hi,0.473684,0.45614
id,0.384615,0.384615
it,0.436364,0.436364
ka,0.449438,0.438202
pl,0.507042,0.507042
pt,0.477987,0.468553
ro,0.533333,0.5
sr,0.375,0.375


In [15]:
# looking at how well cohere understood the task and responded as it was supposed to
def task_understood_help(row):
    if row["cohere_simple_helped"] == "unknown":
        return 0
    else:
        return 1


df_results0["understood_help"] = df_results0.apply(task_understood_help, axis=1)

df_understood = df_results0.groupby("language")[
    ["understood", "understood_help", "match", "match_helped"]
].agg(np.mean)

df_understood.head(
    50
)  # we can now see a change in understanding! so we are seeing that for some languages (ka, pl, pt, ro, tr), there is a change in understanding (ie we extracted a score), but no change in accuracy (ie that score was wrong)
# in fact, all languages that had to be "helped" were wrong in their predictions
# df_understood.plot.bar("understood_help", title="Task Understanding by language after help")

  df_understood = df_results0.groupby("language")[["understood", "understood_help", "match", "match_helped"]].agg(np.mean)


Unnamed: 0_level_0,understood,understood_help,match,match_helped
language,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ar,0.913043,0.913043,0.434783,0.434783
de,1.0,1.0,0.394737,0.394737
hi,0.894737,0.912281,0.45614,0.473684
id,0.931624,0.931624,0.384615,0.384615
it,0.945455,0.963636,0.436364,0.436364
ka,0.842697,0.865169,0.438202,0.449438
pl,0.901408,0.929577,0.507042,0.507042
pt,0.955975,0.977987,0.468553,0.477987
ro,0.933333,0.966667,0.5,0.533333
sr,0.9375,0.9375,0.375,0.375
