In [5]:
import re
import os
import io
import sys
import requests
import pandas as pd
from tqdm import tqdm
from openai import OpenAI
from dotenv import load_dotenv
from colorama import Fore, Style, init
from googleapiclient.discovery import build

load_dotenv()
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
GOOGLE_CSE_ID = os.getenv('GOOGLE_CSE_ID')
google_fact_check_api_key = os.getenv('google_fact_check_api_key')
API_KEY = os.getenv('OPENAI_API_KEY')

client = OpenAI(api_key=API_KEY)

# Initialize colorama
init()

class FactChecker:
    def __init__(self):
        self.messages = []
        self.tools_used = False
        self.available_functions = {
            "call_google": self.call_google,
            "call_google_fact_check": self.call_google_fact_check
        }
    
    def call_google(self, query, **kwargs):
        try:
            service = build(serviceName="customsearch",
                          version="v1",
                          developerKey=GOOGLE_API_KEY,
                          static_discovery=False)
            res = service.cse().list(q=query, cx=GOOGLE_CSE_ID, **kwargs).execute()
            res_items = res["items"]
            res_snippets = [r['snippet'] for r in res_items]
            self.tools_used = True
            return str(res_snippets)
        except Exception as e:
            print(Fore.RED + f"Error: {str(e)}")
            print(Style.RESET_ALL)
            return f"Error in Google search: {str(e)}"

    def call_google_fact_check(self, query):
        try:
            url = f"https://factchecktools.googleapis.com/v1alpha1/claims:search?key={google_fact_check_api_key}&query={query}"
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                self.tools_used = True
                if not data.get("claims"):
                    self.tools_used = False  # if no claims are found, set tools_used to False
                    return "No claims found for this query."
                else:
                    result = []
                    for claim in data.get("claims", []):
                        text = claim.get("text", "N/A")
                        rating = claim['claimReview'][0].get('textualRating', 'N/A') if claim.get('claimReview') else 'No rating available'
                        review_url = claim['claimReview'][0].get('url', 'N/A') if claim.get('claimReview') else 'No review URL available'
                        result.append(f"Claim: {text}\nRating: {rating}\nReview URL: {review_url}")
                    return "\n".join(result)
            else:
                return f"Error: {response.status_code} - {response.text}"
        except Exception as e:
            print(Fore.RED + f"Error: {str(e)}")
            print(Style.RESET_ALL)
            return f"Error in fact check: {str(e)}"
        
    def extract_sub_questions(self, article):
        """抽取子問題的prompt"""
        prompt = f"""
        Please analyze the following article and break it down into 2-3 key factual claims 
        that need to be verified. Format them as numbered questions:
        
        Article: {article}
        
        Please list the sub-questions in this format:
        1. [First question]
        2. [Second question]
        3. [Third question]
        """
        
        messages = [
            {"role": "system", "content": "You are an expert at breaking down articles into key verifiable claims."},
            {"role": "user", "content": prompt}
        ]
        
        response = client.chat.completions.create(
            model='gpt-3.5-turbo',
            messages=messages
        )
        
        return response.choices[0].message.content
    
    def verify_sub_question(self, sub_question, article):
        """驗證單個子問題"""
        self.messages = []
        self.tools_used = False
    
        sys_prompt = """
        You are a fact-checking specialist. Verify the specific claim by:
        1. Thinking about what you already know and what you need to verify
        2. Choose the most appropriate tool to find the information:
            - call_google: For general information and recent events
            - call_google_fact_check: For verifying specific controversial claims
        3. Analyzing the search results
        4. Providing a conclusion
    
        You must use this format:
        Thought: [Your reasoning]
        Action: [call_google or call_google_fact_check]: [search query]
    
        After receiving an observation:
        Thought: [Analysis combining your knowledge and the new information]
        Action: [Another search if needed]
        or
        Answer: [Your final conclusion]
    
        Available actions:
        call_google: [search term]
        call_google_fact_check: [seldom key word about the claim(e.g. if you are verifying "COVID-19 is a hoax", you can use "COVID-19")]

        """
        user_prompt = f"Sub-question: {sub_question}\nArticle context: {article}\n\nPlease verify this claim using both your knowledge and external sources when needed."
    
        self.messages = [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
        ]
    
        return self._run_verification_loop()

    def _run_verification_loop(self):
        action_re = re.compile('^Action: (\w+): (.*)$')
        answer_re = re.compile("Answer: ")
        count = 0
    
        while True:
            response = client.chat.completions.create(
                model='gpt-3.5-turbo',
                messages=self.messages
            )
        
            response_msg = response.choices[0].message.content
            self.messages.append({"role": "assistant", "content": response_msg})
        
            # check if tools have been used
            if answer_re.search(response_msg) and self.tools_used:
                print(Fore.YELLOW + response_msg)
                print(Style.RESET_ALL)
                return self._extract_answer(response_msg)
            elif answer_re.search(response_msg):
                print(Fore.YELLOW + "Answer found but tools have not been used. Searching tools now.")
                print(Style.RESET_ALL)

            print(Fore.GREEN + response_msg)
            print(Style.RESET_ALL)
        
            actions = [action_re.match(a) for a in response_msg.split("\n") if action_re.match(a)]
            if actions:
                action, action_input = actions[0].groups()
                try:
                    print(Fore.CYAN + f" -- running {action} {action_input}")
                    print(Style.RESET_ALL)
                
                    observation = self.available_functions[action](action_input)
                    self.tools_used = True
                
                    if "No claims found" in observation:
                        self.tools_used = False

                    print(Fore.BLUE + f"Observation: {observation}")
                    print(Style.RESET_ALL)
                    self.messages.append({"role": "user", "content": "Observation: " + observation})
            
                except Exception as e:
                    print(Fore.RED + f"Error: {e}")
                    print(Style.RESET_ALL)
                    continue
        
            # no action detected
            else:
                count += 1
                if count == 3:
                    print(Fore.RED + "No action is detected. The answer is Unknown.")
                    return None
                print(Fore.RED + "No action is detected. Continue...")
                self.tools_used = False
                continue
    
    def _extract_answer(self, response_msg):
        """從回應中提取答案"""
        lines = response_msg.split('\n')
        for line in lines:
            if line.startswith('Answer:'):
                try:
                    answer = int(line.replace('Answer:', '').strip())
                    return answer
                except ValueError:
                    continue
        return None
    

    def _execute_action(self, action, action_input):
        """執行搜尋動作"""
        if action in self.available_functions:
            print(Fore.CYAN + f" -- running {action} {action_input}")
            print(Style.RESET_ALL)
            result = self.available_functions[action](action_input)
            print(Fore.BLUE + f"Observation: {result}")
            print(Style.RESET_ALL)
            return result
        else:
            print(Fore.RED + f"Unknown action: {action}")
            print(Style.RESET_ALL)
            return f"Unknown action: {action}"
                
    def combine_results(self, results, article):
        """綜合所有子問題的結果"""
        prompt = f"""
        Based on the verification results of all sub-questions:
        {results}
    
        Original article:
        {article}
    
        Please provide a final conclusion about whether the article contains any misleading information.
        First provide your detailed reasoning, then end with either:
        Final Answer: 1 (if misleading)
        Final Answer: 0 (if accurate)
        """
    
        messages = [
            {"role": "system", "content": "You are a fact-checking expert providing final conclusions based on detailed analysis of multiple claims."},
            {"role": "user", "content": prompt}
        ]
    
        response = client.chat.completions.create(
            model='gpt-3.5-turbo',
            messages=messages
        )
    
        return response.choices[0].message.content

    def verify_article(self, article):
        """主要驗證流程"""
        print(Fore.CYAN + "Extracting sub-questions..." + Style.RESET_ALL)
        sub_questions = self.extract_sub_questions(article)
        print(Fore.YELLOW + "Sub-questions extracted:\n" + sub_questions + Style.RESET_ALL)
        
        results = []
        for q in sub_questions.split("\n"):
            if q.strip() and q[0].isdigit():
                print(Fore.CYAN + f"\nVerifying: {q}" + Style.RESET_ALL)
                result = self.verify_sub_question(q, article)
                results.append(f"Question: {q}\nResult: {result}")
        
        print(Fore.CYAN + "\nCombining results..." + Style.RESET_ALL)
        final_conclusion = self.combine_results(results, article)
        print(Fore.YELLOW + "\nFinal conclusion:\n" + final_conclusion + Style.RESET_ALL)
        
        return final_conclusion
    
    def _clean_ansi_codes(self, text):
        """移除文字中的 ANSI 顏色代碼"""
        ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
        return ansi_escape.sub('', text)
    
    def process_article_batch(self, input_file, output_file, num_files=None):
        """處理整個Excel文件中的文章"""
        log_file = "fact_checking_process.log"
        
        try:
            with open(log_file, 'w', encoding='utf-8') as f:
                try:
                    df = pd.read_csv(input_file, encoding='utf-8-sig')
                except UnicodeDecodeError:
                    df = pd.read_csv(input_file, encoding='latin1')
        
                if num_files is not None:
                    df_to_process = df.head(num_files)
                else:
                    df_to_process = df
            
                df['fact_check_response'] = ''
                df['Simplified Answer'] = ''

                df.to_csv(output_file, index=False, encoding='utf-8-sig')
                print("開始處理文章...")
        
                for idx, row in tqdm(df_to_process.iterrows(), total=len(df_to_process), desc="Processing articles"):
                    
                    article = row['synthetic_misinformation']    # LLMFake 
                    # article = row['text']   # mmsoc_gossipcop_text_labels-100 

                    # 捕獲所有輸出到一個字符串中
                    output_capture = io.StringIO()
                    original_stdout = sys.stdout
                    sys.stdout = output_capture

                    try:
                        result = self.verify_article(article)
                    finally:
                        sys.stdout = original_stdout

                    detailed_output = output_capture.getvalue()
                    cleaned_output = self._clean_ansi_codes(detailed_output)    
                    simplified_answer = self._extract_simplified_answer(result)

                    f.write(detailed_output)
                    f.flush()

                    current_df = pd.read_csv(output_file, encoding='utf-8-sig')

                    current_df.at[idx, 'fact_check_response'] = cleaned_output
                    current_df.at[idx, 'Simplified Answer'] = simplified_answer
            
                    current_df.to_csv(output_file, index=False, encoding='utf-8-sig')

                print(f"\nAll articles processed. Results saved to {output_file}", file=original_stdout)
        
        except Exception as e:
            print(f"Error processing batch: {str(e)}", file=original_stdout)
            raise e
  
    def _extract_simplified_answer(self, response):
        """從回應中提取簡化的答案 (1 或 0)"""
        try:
            match = re.search(r"Final Answer:\s*(\d)", response)
            if match:
                return int(match.group(1))
        
            digits = re.findall(r'[01]', response)
            if digits:
                return int(digits[-1])
            else:
                return "Unknown"
        except Exception:
            return "Error"

    

## 分析整個檔案

In [None]:
def main():
    input_file = "LLMFake_politifact-100.csv"  # if LLMFake file，revise above to synthetic_misinformation
    output_file = "react_child_task_LLMFake_politifact-100.csv"
    num_files_to_process = 100
    
    checker = FactChecker()
    
    try:
        checker.process_article_batch(input_file, output_file, num_files=num_files_to_process)
        
    except Exception as e:
        print(Fore.RED + f"Error in main execution: {str(e)}" + Style.RESET_ALL)
        sys.exit(1)

if __name__ == "__main__":
    main()

開始處理文章...


Sub-questions extracted:
1. Is Senator Schumer immune to the kind of sexual accusations that many congressmen in Washington have been accused of recently?
   
2. Has the author made hasty posts in the past that might have been questionable, but clarifies that they were not made out of ill will? 

Verifying: 1. Is Senator Schumer immune to the kind of sexual accusations that many congressmen in Washington have been accused of recently?
Thought: This claim suggests that Senator Schumer may be immune to the kind of sexual accusations that other congressmen in Washington have faced recently. To verify this claim, I will need to look up any specific accusations or controversies related to Senator Schumer and compare them to the claims made in the article. I will also check for any recent news about Schumer and sexual misconduct allegations. 
Action: call_google_fact_check: Senator Schumer sexual misconduct allegations

 -- running call_google_fact_check Senator Schumer sexual misconduct all


All articles processed. Results saved to react_child_task_LLMFake_politifact-100.csv


### analysis single article

In [4]:
checker = FactChecker()
article = """Taylor Swift: Why She Didn’t Have A Fourth Of July Party Again & Is ‘Happier’ Because Of It Taylor Swift opted to skip her Fourth of July bash for the second year in a row to go snorkeling with boyfriend Joe Alwyn. Here’s why she’s ‘happier’ with a more low-key celebration.

Taylor Swift, 28, has thrown some pretty epic Fourth of July parties in years past, but she’s forwent the epic celebration in 2017, and now again in 2018. This year, she instead spent her time off from touring to go to Turks and Caicos with boyfriend Joe Alwyn, 27.

“Taylor likes keeping things a little more low key nowadays because it works for her to have alone time and she is enjoying the time she needs and has with Joe,” a source close to the singer tells HollywoodLife EXCLUSIVELY. “It is very important for her for that to be private.”

The 10-time Grammy winner has been keeping her relationship under wraps. The pair have only been photographed out together a handful of times despite dating for well over a year. Taylor also rescinded from the spotlight in 2016, and has been living more privately ever since. Our insider adds that the “Delicate” hitmaker intends on keeping her life this way. “She is not interested in all the craziness from previous years. She has matured to this way of life and it fits and suits her so well,” our source explains. “She is happy and healthy and she’ll definitely continue this moving forward.”

Taymerica might be a thing of the past, but fans were still delighted when paparazzi photos of their idol surfaced online on July 5. The images showed Taylor swimming and snorkeling in the ocean with Joe, and they looked incredibly happy together. The “Shake It Off” songstress also paid tribute to her favorite holiday by rocking a red and white striped high-waisted bikini. Don’t worry Swifties, the Old Taylor is still alive and well."""
result = checker.verify_article(article)

Extracting sub-questions...
Sub-questions extracted:
1. Why did Taylor Swift opt to skip her Fourth of July party for the second year in a row and go snorkeling with boyfriend Joe Alwyn instead?
2. Is it true that Taylor Swift is preferring a more low-key lifestyle nowadays and values privacy in her relationship with Joe Alwyn?
3. Is Taylor Swift intentionally stepping back from the spotlight and aiming to maintain a more private and mature way of life moving forward?

Verifying: 1. Why did Taylor Swift opt to skip her Fourth of July party for the second year in a row and go snorkeling with boyfriend Joe Alwyn instead?
Thought: This claim involves Taylor Swift opting to skip her Fourth of July party for the second year in a row to go snorkeling with her boyfriend Joe Alwyn instead. To verify this claim, I will search for information on Taylor Swift's recent activities and statements to see if she indeed chose a more low-key celebration this year.

Action: call_google: Taylor Swift Four

### Caculate Accuracy

In [None]:
import pandas as pd

file_name = "react_child_task_LLMFake_politifact-100"

data = pd.read_csv(f'{file_name}.csv')
data['Simplified Answer'] = data['Simplified Answer'].astype(str)

# 計算總體準確率（僅考慮 0 和 1）
valid_predictions = data[data['Simplified Answer'].isin(['0', '1'])]
overall_accuracy = (valid_predictions['label'] == valid_predictions['Simplified Answer'].astype(int)).mean()

# 計算 label=1（假新聞）的準確率
label_1_data = valid_predictions[valid_predictions['label'] == 1]
accuracy_label_1 = (label_1_data['label'] == label_1_data['Simplified Answer'].astype(int)).mean()
mistakes_label_1 = len(label_1_data) - (label_1_data['label'] == label_1_data['Simplified Answer'].astype(int)).sum()

# 計算 label=0（真新聞）的準確率
label_0_data = valid_predictions[valid_predictions['label'] == 0]
accuracy_label_0 = (label_0_data['label'] == label_0_data['Simplified Answer'].astype(int)).mean()
mistakes_label_0 = len(label_0_data) - (label_0_data['label'] == label_0_data['Simplified Answer'].astype(int)).sum()

# 統計 "Uncertain" 和 "error" 的情況
uncertain_count = len(data[data['Simplified Answer'] == 'Unknown'])
error_count = len(data[data['Simplified Answer'] == 'ERROR'])

print(f"檔案名稱: {file_name}.csv")
print(f"總體準確率: {overall_accuracy:.2f}")
print(f"label=1（假新聞）的準確率: {accuracy_label_1:.2f}, 錯誤數量: {mistakes_label_1}")
print(f"label=0（真新聞）的準確率: {accuracy_label_0:.2f}, 錯誤數量: {mistakes_label_0}")
print(f"Uncertain 的數量: {uncertain_count}")
print(f"ERROR 的數量: {error_count}")
