### Setup

In [1]:
import os
import pickle
from abc import ABC, abstractmethod
from typing import Any, Dict, Iterable, List, Optional

import gradio as gr
import numpy as np
import openai
import pandas as pd
from sentence_transformers import SentenceTransformer, util
from tqdm import tqdm

openai.api_key = os.getenv("OPENAI_API_KEY")

# Data

In [3]:
dataset_path = "../data/dubawa_claims.json"
claims = pd.read_json(dataset_path)

In [4]:
claims

Unnamed: 0,claimReviewed,text,url
0,A no-interest loan is being offered to vulnera...,Claim: A WhatsApp broadcast message claims tha...,https://dubawa.org/true-fgs-no-interest-geep-l...
1,Dino Melaye's G-Wagon SUV riddled with bullets...,Claim: Senator Dino Melaye shared a photo of h...,https://dubawa.org/photo-shared-by-dino-melaye...
2,"Akufo-Addo received a malaria vaccine, not cor...","Claim: Akufo-Addo received a malaria vaccine, ...",https://ghana.dubawa.org/ayarigas-claims-on-ak...
3,Viral news reports claim a Ghanaian counselor ...,Claim: Viral news reports claim a Ghanaian cou...,https://ghana.dubawa.org/no-scientific-evidenc...
4,Social media users are claiming that Alex Tell...,Claim: Social media users are claiming that Al...,https://ghana.dubawa.org/ucl-alcantara-not-tel...
...,...,...,...
970,Do not consume alcohol for 45 days after the C...,Claim: Do not consume alcohol for 45 days afte...,https://ghana.dubawa.org/a-45-days-post-covid-...
971,That a black man named Philip Emeagwali,CLAIM: The Creator of the Internet was a black...,https://dubawa.org/philip-emeagwali-is-not-the...
972,"A claim that Buhari Sacked CBN Governor, Godwi...","CLAIM: “Buhari Sacked CBN Governor, Godwin Eme...",https://dubawa.org/buhari-did-not-sack-the-cbn...
973,"Local spirits, like akpeteshie, can be used in...",Since the first two cases of COVID-19 in Ghana...,https://ghana.dubawa.org/drinking-akpeteshie-o...


# QA

In [5]:
import json
import logging
from abc import abstractmethod

import openai
from tenacity import (
    retry,
    retry_if_exception_type,
    stop_after_attempt,
    wait_random_exponential,
)

logger = logging.getLogger(__name__)


class LLM:
    @abstractmethod
    def completion(self, *args, **kwargs):
        """Generate text"""
        raise NotImplementedError

    @retry(
        wait=wait_random_exponential(min=1, max=20),
        stop=stop_after_attempt(3),
        retry=(
            retry_if_exception_type(openai.error.RateLimitError)
            | retry_if_exception_type(openai.error.ServiceUnavailableError)
        ),
    )
    def completion_with_retry(self, *args, **kwargs):
        return self.completion(*args, **kwargs)


class GPT(LLM):
    def __init__(
        self,
        model: str = "text-davinci-003",
        temperature=0.7,
        max_tokens=None,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
        n=1,
        logit_bias={},
    ):
        self.params = {
            "model": model,
            "temperature": temperature,
            "max_tokens": max_tokens,
            "top_p": top_p,
            "frequency_penalty": frequency_penalty,
            "presence_penalty": presence_penalty,
            "n": n,
            "logit_bias": logit_bias,
        }

    def completion(self, prompt):
        logger.info("PROMPT: {prompt}".format(prompt=prompt))
        response = openai.Completion.create(prompt=prompt, **self.params)
        text = response.choices[0].text
        logger.info("ANSWER: {answer}".format(answer=text))
        return text


class ChatGPT(LLM):
    def __init__(
        self,
        model: str = "gpt-3.5-turbo",
        temperature=0.7,
        max_tokens=None,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
        n=1,
        logit_bias={},
    ):
        self.params = {
            "model": model,
            "temperature": temperature,
            "max_tokens": max_tokens,
            "top_p": top_p,
            "frequency_penalty": frequency_penalty,
            "presence_penalty": presence_penalty,
            "n": n,
            "logit_bias": logit_bias,
        }

    def completion(
        self, prompt, system_message="You are a helpful assistant.", history=None
    ):
        messages = []
        if history:
            messages.extend(history)
        elif system_message:
            messages.append({"role": "system", "content": system_message})
        messages.append({"role": "user", "content": prompt})

        logger.info(
            "MESSAGES: {messages}".format(messages=json.dumps(messages, indent=4))
        )
        response = openai.ChatCompletion.create(messages=messages, **self.params)
        text = response.choices[0].message["content"]
        logger.info("ANSWER: {answer}".format(answer=text))
        return text

In [6]:
llm = ChatGPT()

### Prompts

In [7]:
INTERESTING_Q_TEMPLATE = '''Read the following fact-checking article caerfully and write a question asking for information that can be answer within:

Article:"""{article}"""
Question:'''

In [8]:
article_sample = claims.sample(10).copy()

In [9]:
article_sample["Q"] = article_sample["text"].apply(lambda x: llm.completion_with_retry(INTERESTING_Q_TEMPLATE.format(article=x)))

In [13]:
article_sample.to_csv("../data/interesting_questions.csv")