In [9]:
import os
import requests
import together
from langchain.llms.base import LLM

class TogetherLLM(LLM):
    """Together large language models."""

    model: str = "togethercomputer/llama-2-13b-chat"
    """model endpoint to use"""

    together_api_key: str = os.environ['TOGETHERAI_API_KEY']
    """Together API key"""

    temperature: float = 0.0
    """What sampling temperature to use."""

    max_tokens: int = 512
    """The maximum number of tokens to generate in the completion."""

    @property
    def _llm_type(self) -> str:
        """Return type of LLM."""
        return "together"

    def _call(
        self,
        prompt: str,
        **kwargs,
    ) -> str:
        """Call to Together endpoint."""
        endpoint = 'https://api.together.xyz/inference'
                
        for attempt in range(10):
            try:
                res = requests.post(endpoint, json={
                    "prompt": prompt,
                    "model": self.model,
                    "temperature": self.temperature,
                    "max_tokens": self.max_tokens
                }, headers={
                    "Authorization": f"Bearer {self.together_api_key}",
                    "User-Agent": "<YOUR_APP_NAME>"
                })
                output = res.json()['output']['choices'][0]['text']
                return output
            except Exception as e:
                print(e)
                continue
            else:
                break

        raise Exception(f"Request did not succeed with prompt = {prompt}")    


In [10]:
import json
import pandas as pd
from langchain import PromptTemplate
import re

from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report
)

class Utility:
    B_CHAT, E_CHAT = "<s>", "</s>"
    B_INST, E_INST = "[INST]", "[/INST]"
    B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
    
    @staticmethod
    def get_prompt(system_message, user_message, input_variables, history):
        system_prompt = f"{Utility.B_SYS}{system_message}{Utility.E_SYS}"
        
        prompt_template_items = []
                        
        for index, (query, response) in enumerate(history):
            if index == 0:
                prompt_template_items.append(Utility.B_CHAT)
                prompt_template_items.append(Utility.B_INST)
                prompt_template_items.append(" ")
                prompt_template_items.append(system_prompt)
                prompt_template_items.append(query)
                prompt_template_items.append(" ")
                prompt_template_items.append(Utility.E_INST)
                prompt_template_items.append(" ")
                prompt_template_items.append(response)
                prompt_template_items.append(Utility.E_CHAT)
            else:
                prompt_template_items.append(Utility.B_CHAT)
                prompt_template_items.append(Utility.B_INST)
                prompt_template_items.append(" ")
                prompt_template_items.append(query)
                prompt_template_items.append(" ")
                prompt_template_items.append(Utility.E_INST)
                prompt_template_items.append(" ")
                prompt_template_items.append(response)
                prompt_template_items.append(Utility.E_CHAT)
        
        if not history:
            prompt_template_items.append(Utility.B_CHAT)
            prompt_template_items.append(Utility.B_INST)
            prompt_template_items.append(" ")
            prompt_template_items.append(system_prompt)
            prompt_template_items.append(user_message)
            prompt_template_items.append(" ")
            prompt_template_items.append(Utility.E_INST)
        else:
            prompt_template_items.append(Utility.B_CHAT)
            prompt_template_items.append(Utility.B_INST)
            prompt_template_items.append(" ")
            prompt_template_items.append(user_message)
            prompt_template_items.append(" ")
            prompt_template_items.append(Utility.E_INST)
        
        prompt_template = "".join(prompt_template_items)
        prompt = PromptTemplate(template=prompt_template, input_variables=input_variables)
        return prompt
    
    @staticmethod
    def extract_type_from_response(response, search_term):
        does_not_include_patterns = [
            f"""that tweet as it is not {search_term.lower()}""",
            f"""(the|this|the given) tweet.*is not {search_term.lower()}""",
            f"""i.*classify (it|the tweet) as not {search_term.lower()}""",
            f"""(?:the|this|the given)?\s*(?:tweet|it) does not fall (into|under) the (?:category of )?{search_term.lower()}""",
            f"""it is therefore classified as not {search_term.lower()}""",
            f"""i would classify the (given )?tweet as not {search_term.lower()}"""
        ]

        includes_patterns = [
            f"""(the|this|the given) tweet.*is {search_term.lower()}""",
            f"""i.*classify (it|the tweet) as {search_term.lower()}""",
            f"""(?:the|this|the given)?\s*(?:tweet|it) (falls into|falls under|under) the (?:category of )?{search_term.lower()}""",
            f"""this is a {search_term.lower()} claim as it is based on""",
            f"""the tweet is classified as {search_term.lower()}""",
            f"""this claim can be verified.*{search_term.lower()}""",
            f"""therefore, it can be classified as {search_term.lower()}""",
            f"""this is a direct statement about.*{search_term.lower()}""",
            f"""so it falls under the category of {search_term.lower()}""",
            f"""the tweet is reporting on a {search_term.lower()} fact""",
            f"""this tweet contains a direct statement about.*{search_term.lower()}"""
        ]

        for pattern in does_not_include_patterns:
            if re.search(pattern, response.lower()):
                print("pattern does not include = ", pattern)
                return '@'

        for pattern in includes_patterns:
            if re.search(pattern, response.lower()):
                print("pattern include = ", pattern)
                return '#'

        for c in response[::-1]:
            if c == '@' or c == '#':
                print("Inside last search!")
                return c

        raise Exception(f"No @ or # found in response = {response}")
    
    @staticmethod
    def calculate_metrics(ground_truth, predicted):
#         accuracy = accuracy_score(ground_truth, predicted)
#         precision = precision_score(ground_truth, predicted)
#         recall = recall_score(ground_truth, predicted)
#         f1 = f1_score(ground_truth, predicted)
        
        clsf_report = classification_report(y_true = ground_truth, y_pred = predicted, output_dict=True)
        cf_matrix = confusion_matrix(ground_truth, predicted)
        
        precision = clsf_report['weighted avg']['precision']
        recall = clsf_report['weighted avg']['recall']
        f1 = clsf_report['weighted avg']['f1-score']
        accuracy = accuracy_score(ground_truth, predicted)
        
        return {
            "Accuracy": accuracy * 100,
            "Precision": precision * 100,
            "Recall": recall * 100,
            "F1": f1 * 100,
            "Confusion Matrix": cf_matrix
        }
    
    @staticmethod
    def get_tweet_data(file_name):
        df = pd.read_csv(file_name, index_col=0)
        return df
    
    @staticmethod
    def write_prediction_output(tweet_objects, file_name_to_write):
        if os.path.exists(file_name_to_write):
            os.remove(file_name_to_write)
        
        tweet_objects.to_csv(file_name_to_write)


In [11]:
class Default(dict):
    def __missing__(self, key):
        return f"{{{key}}}"

In [14]:
from langchain import LLMChain

class Category:
    INPUT_VARIABLES=["delimiter", "tweet"]
    
    CATEGORY_DESCRIPTIONS = {1: "Scientifically Verifiable"}
    
    DELIMITER = "```"
    
    def __init__(self, category_type, llm):
        self.category_type = category_type
        
        self.llm = llm
        self.history = []
        
    def does_tweet_fall_into_category(self, tweet, was_previous_classification_correct=None):
        print("Inside does_tweet_fall_into_category function")
                
        if was_previous_classification_correct is None:
            user_message = """
             Classify the following tweet:
             Tweet: {delimiter} {tweet} {delimiter}
             """
        else:
            if was_previous_classification_correct:
                user_message = """
                 Your previous classification was correct. Now, classify the following tweet:
                 Tweet: {delimiter} {tweet} {delimiter}
                 """
            else:
                user_message = """
                 Your previous classification was incorrect. Now, classify the following tweet:
                 Tweet: {delimiter} {tweet} {delimiter}
                 """
        
        verifiable_claim_chain_prompt = Utility.get_prompt(self.verifiable_claim_chain_system_message, 
                                                           user_message, Category.INPUT_VARIABLES, [])
        
        non_verifiable_claim_chain_prompt = Utility.get_prompt(self.non_verifiable_claim_chain_system_message, 
                                                               user_message, Category.INPUT_VARIABLES, [])

        verifiable_claim_chain = LLMChain(llm = self.llm, 
                                          prompt = verifiable_claim_chain_prompt)
        
        non_verifiable_claim_chain = LLMChain(llm = self.llm, 
                                          prompt = non_verifiable_claim_chain_prompt)

        input_values = {"tweet": tweet, "delimiter": Category.DELIMITER}
        
        verifiable_claim_chain_response = verifiable_claim_chain.run(input_values)
        non_verifiable_claim_chain_response = non_verifiable_claim_chain.run(input_values)
        
        verifiable_claim_chain_response_number = Utility.extract_number(verifiable_claim_chain_response)
        non_verifiable_claim_chain_response_number = Utility.extract_number(non_verifiable_claim_chain_response)
        
        if verifiable_claim_chain_response_number != non_verifiable_claim_chain_response_number:
            return verifiable_claim_chain_response_number
        
        arbitrer_claim_chain_user_message = """
        Response1: {response1}
        Response2: {response2}
        """
        
        arbitrer_claim_chain_input_variables = ["response1", "response2"]
        
        arbitrer_claim_chain_prompt = Utility.get_prompt(self.arbitrer_claim_chain_system_message, 
                                                         arbitrer_claim_chain_user_message, 
                                                         arbitrer_claim_chain_input_variables,
                                                         self.history)
        
        arbitrer_claim_input_values = {"response1": verifiable_claim_chain_response, 
                                       "response2": non_verifiable_claim_chain_response}
        
                
        arbitrer_claim_chain = LLMChain(llm = self.llm, 
                                        prompt = arbitrer_claim_chain_prompt)
        
        arbitrer_response = arbitrer_claim_chain.run(arbitrer_claim_input_values)
        
        return Utility.extract_number(arbitrer_response)
        
    def generate_cat_metrics(self, output_file_name, tweet_content_column="polished_text"):
        ground_truths = []
        predicted_outputs = []

        print("<======= Generating metrics for category type =", self.category_type, "=======>")
        print()

        # check if predicted_cat_type column exists. If not, create it.

        category_type_prediction_column_name = f"predicted_{self.category_type}"
        
        start_index = 0
        end_index = 200

        if category_type_prediction_column_name not in self.tweet_objects:
            self.tweet_objects[category_type_prediction_column_name] = -1
        else:
            for index in range(start_index, end_index + 1):
                if index not in self.tweet_objects[column_name]:
                    continue

                if self.tweet_objects[category_type_prediction_column_name][index] != -1:
                    raise Exception(
                        "Some of the indices for the specified range have already been computed."
                    )
        
        was_previous_classification_correct = None
        
        for index in range(start_index, end_index + 1):
            if index not in self.tweet_objects[tweet_content_column]:
                continue
                
            print("Processing tweet with index# =", index)
            tweet = self.tweet_objects.iloc[index][tweet_content_column]
            print("Tweet content = ", tweet)
            
            for attempt in range(10):
                try:
                    ground_truth = int(self.tweet_objects.iloc[index][f"cat{self.category_type}"])
                except:
                    continue
                else:
                    break
                                
            for attempt in range(10):
                try:
                    predicted_output = self.does_tweet_fall_into_category(tweet,
                                                                          was_previous_classification_correct)
                    
                    if predicted_output is None:
                        continue
                    else:
                        break
                except:
                    continue
            
            if predicted_output is None:
                print("None for index# = ", index, "and tweet content =", tweet)
                raise Exception("Did not get predicted output for tweet")
                
            if index > 0 and index % 5 == 0:
                print("Metrics till now =", Utility.calculate_metrics(ground_truths, predicted_outputs))

            ground_truths.append(ground_truth)
            predicted_outputs.append(predicted_output)
            
            response_list = ["Tweet is not scientifically verifiable", "Tweet is scientifically verifiable"]
            
#             if len(self.history) == 20:
#                 self.history.pop(0)
            
#             self.history.append((f"Tweet = {tweet}", response_list[predicted_output]))
            
            if ground_truth == predicted_output:
                was_previous_classification_correct = True
            else:
                was_previous_classification_correct = False
            
            self.tweet_objects.loc[index, category_type_prediction_column_name] = predicted_output

            print("Ground truth =", ground_truth, "Predicted output =", predicted_output)
            print("Finished Processing tweet with index# =", index)
            print()

        print("<======= Finished generating metrics for claim existence =======>")
        
        print("Ground truths = ", ground_truths)
        print("Predictions = ", predicted_outputs)
        
        Utility.write_prediction_output(self.tweet_objects, output_file_name)
        
        return Utility.calculate_metrics(ground_truths, predicted_outputs)


In [15]:
class Category1(Category):
    CATEGORY_TYPE = 1
    CATEGORY_DESCRIPTION = ""
    
    def __init__(self, llm, input_file_name):
        self.verifiable_claim_chain_system_message, \
        self.non_verifiable_claim_chain_system_message, \
        self.arbitrer_claim_chain_system_message, \
        self.tweet_objects = Category1.generate_system_prompt_for_category1(input_file_name)
        
        super().__init__(Category1.CATEGORY_TYPE, llm)
        
    @staticmethod
    def generate_system_prompt_for_category1(input_file_name):
        category1_indices = [22, 56, 71]
        non_category_1_indices = [21, 61, 90]

        tweet_examples_of_category1 = """
        Some examples of tweets that ARE scientifically verifiable (expected response 1):
            a) " ::people_holding_hands:: We can now meet our family and friends outdoors in a group of 6, or 2 households ::leftright_arrow:: Its important that when we do, we follow social distancing guidance ::backhand_index_pointing_right:: This will help to stop the spread of COVID19 as we take the next step out of lockdown LetsDoItForLancashire "
            b) ": BREAKING: Dozens of cops in Massachusetts have resigned in protest of the vaccine mandates. TO WISH THEM GOOD RIDDA"
            c) ": BREAKING Syria president and first lady test positive for COVID19: presidency AFP"
        """
        
        tweet_examples_of_non_category1 = """
        Some examples of tweets ARE NOT scientifically verifiable (expected response 0):
            a) " : The ones calling for lockdown, without risk or injury to themselves, should pay up."
            b) ": Can you catch coronavirus from handling cash? A new study says the risk is low"
            c) ": I wouldnt trust anything this man touches. NoVaccineForMe"
        """
        
        verifiable_claim_chain_system_message = """
        Imagine you're a COVID-19 tweets classifier. You will determine whether tweets fall into scientifically verifiable claim category using the following guidelines:

            I) Direct statements about the COVID-19 virus, its origin, its transmission, prevention methods, or symptoms etc ARE scientifically verifiable. For example:
                - Example 1: "Masks don't work against COVID-19."
                - Example 2: "The government needs to get to the bottom of COVID-19 origin and Chinese involvement."

            II) Opinionated, anecdotal, or hearsay claims about COVID-19 topics MAY BE scientifically verifiable:
                - Example 1: "Talked to a friend who believes the virus started from bats in a wet market. Sounds plausible." (Hearsay)
                - Example 2: "Got my vaccine yesterday and I feel great! Proof that it works" (Anecdote)
                - Example 3: "Based on my research, I'm convinced the virus started from bats in a wet market." (Opinion)
                - Example 4: "Don't forget to practice social distancing. It will keep everyone safe." (Opinion)

            III) Reports on cases, deaths, or someone testing positive ARE scientifically verifiable. For example:
                - Example 1: "Justin Bieber has tested positive for COVID19"
                - Example 3: "Almost 2000 people have died from COVID-19 in Brazil"

        For a given tweet, step through each of the points. If there's a match with one of the points, return 1. If there's no match with any of the points, return 0. Your response should contain the entire process of stepping through the guidelines.

        Output Instructions:
                - Tweet = "Yall can try that damn vaccine on yourselves first! Im not trying to turn into an anamorph"
                    - point I) doesn't match as this is not a direct statement about COVID-19 topics.
                    - point II) matches as the tweet contains an opinion about the vaccine (a COVID-19 topic)
                        - The tweet is scientifically verifiable. Return 1.

                - Tweet = "Yo jus dont use Amazon this lockdown, well were all gettin burnt 2 the ground, bezos just rackin n not payin tax to help the nhs, schools, carework, nothing.. if u feed the beast yr just helpin them destroy us.."
                    - point I) doesn't match as there's no direct statement about COVID-19 topics
                    - point II) doesn't match as there's no opinions about COVID-19 topics
                    - point III) doesn't match as there's no report
                    - The tweet is not scientifically verifiable. Return 0.
        """
        
        non_verifiable_claim_chain_system_message = """
        Imagine you're a COVID-19 tweets classifier. You will determine whether tweets do not fall into scientifically verifiable claim category using the following guidelines:

            I) Observational statements ARE NOT scientifically verifiable. For example:
                - Example 1: "Many people aren't wearing masks"
                - Example 2: "People aren't really following social distancing, apparently"

            II) Impacts of COVID-19 on fields other than science - Business, Law, Histoy, Politics, Operations etc ARE NOT scientifically verifiable. For example:
                - Example 1: "We haven't been able to open our restaurant as COVID-19 has impacted us to operate at full capacity." (COVID-19's effect on Business)
                - Example 2: "COVID-19 disrupted global supply chains, leading to shortages of essential goods and a rise in production costs." (COVID-19's effect on Operations)
                - Example 3: "Legal disputes over lease agreements surged during the pandemic, particularly where tenants were unable to meet their rental obligations due to lockdowns" (COVID-19's effect on Law)
                - Example 4: "The pandemic triggered geopolitical tensions, with countries competing for access to limited vaccine supplies and engaging in 'vaccine diplomacy'." (COVID-19's effect on Politics)
                - Example 5: "Film and television production faced long hiatuses, and when resumed, had to adapt to strict health protocols." (COVID-19's effect on Entertainment)

            III) Second-hand opinions or queries ARE NOT scientifically verifiable. For example:
                - Example 1: "My neighbor says that social distancing is just a way to keep us apart and isolated. Thoughts?" (The opinion is not author's, but his neighbor's)
                - Example 2: "They have said we need more social distancing even after vaccines. I don't understand why." (The first part is not author's saying.)

            IV) Asking questions without a direct claim ARE NOT scientifically verifiable. For example:
                - Example 1: "Why do we still need to wear masks after vaccination?"
                - Example 2: "I'm surprised. Weren't they all vaccinated at the company conference?"

            V) Instructions, Information, notifications or announcements that do not contain opinions about COVID-19 topics ARE NOT scientifically verifiable. For examples:
                - Example 1: "You need to wear masks and follow social distancing to get on buses, trains or planes" (Instructions)
                - Example 2: "Due to social distancing, our restaurant hasn't been able to operate at full capacity" (Dispatching Information)
                - Example 3: "Travel advisory: If you're returning from a hotspot, you need to self-quarantine for 14 days." (Announcement)
                - Example 4: "You will receive communication if you are eligible for the vaccine." (Notification)
                - Example 5: "Get your free COVID-19 test by just walking in a clinic today" (Announcement)
                - Example 6: "A hospital is using a new software to track COVID-19 cases."

            VI) Political, Business or Legal motive behind COVID-19 topics ARE NOT scientifically verifiable. For example:
                - Example 1: "The Trump administration could have sped up the vaccine development process. If it was a democratic president, they would have dont it."
                - Example 2: "The pharmaceutical companies were not pressured politically by the government to deliver vaccines."

            VII) Phrases like "Read the whole story here", "Full version", "This story from", "Live Video", "How it became", "Here's a quick look" etc. means it is a news reporting. These tweets DO NOT contain scientifically verifiable claim.

        For a given tweet, step through each of the points. If there's a match with one of the points, return 1. If there's no match with any of the points, return 0. Your response should contain the entire process of stepping through the guidelines.

        Output Instructions:
        - Tweet = "Yall can try that damn vaccine on yourselves first! Im not trying to turn into an anamorph"
            - point I) doesn't match as this is not an observation.
            - point II) doesn't match as the tweet doesn't mention impact of non-scientific fields
            - point III) doesn't match as the tweet doesn't contain second-hand opinions
            - point IV) doesn't match as the tweet doesn't pose a question
            - point V) doesn't match as the tweet doesn't contain instructions, info, notification or annoucements that do not contain opinion
            - point VI) doesn't match as there's no motive
            - point VII) doesn't match as there's no such phrase
            - The tweet is scientifically verifiable. Return 0.

        - Tweet = "Yo jus dont use Amazon this lockdown, well were all gettin burnt 2 the ground, bezos just rackin n not payin tax to help the nhs, schools, carework, nothing.. if u feed the beast yr just helpin them destroy us.."
            - point I) doesn't match as this is not an observation.
            - point II) matches as the tweet mentions the impact of Amazon's predatory business policies on everyday life.
            - The tweet is not scientifically verifiable. Return 1.
        """
        
        arbitrer_claim_chain_system_message = """
        Imagine you're a COVID-19 tweets classifier. You will receive two responses for each tweet - one being the argument that the tweet is scientifically verifiable, and other that the tweet is not scientifically verifiable. Your job is to decide which one is correct.

        Give your reasoning which one of the two responses is more accurate using the following guidelines:
            I) Direct statements about the COVID-19 virus, its origin, its transmission, prevention methods, or symptoms etc ARE scientifically verifiable.
            II) Opinionated, anecdotal, or hearsay claims about COVID-19 topics MAY BE scientifically verifiable.
            III) Reports on cases, deaths, or someone testing positive ARE scientifically verifiable.
            IV) Observational statements ARE NOT scientifically verifiable. Focus on the difference between observations and opinions. Opinions have claims. Observations do not have claims.
            V) Impacts of COVID-19 on fields other than science - Business, Law, Histoy, Politics, Operations etc ARE NOT scientifically verifiable.
            VI) Second-hand opinions or queries ARE NOT scientifically verifiable.
            VII) Asking questions without a direct claim ARE NOT scientifically verifiable.
            VIII) Instructions, Information, notifications or announcements that do not contain opinions about COVID-19 topics ARE NOT scientifically verifiable.
            IX) Political, Business or Legal motive behind COVID-19 topics ARE NOT scientifically verifiable.
            X) Phrases like "Read the whole story here", "Full version", "This story from", "Live Video", "How it became", "Here's a quick look" etc. means it is a news reporting. These tweets DO NOT contain scientifically verifiable claim.
            
        {tweet_examples_of_category1}
        
        {tweet_examples_of_non_category1}

        If the tweet is scientifically verifiable, return 1. Otherwise, return 0.
        """.format_map(Default(tweet_examples_of_non_category1=tweet_examples_of_non_category1, \
                               tweet_examples_of_category1=tweet_examples_of_category1))
        
        tweet_objects = Utility.get_tweet_data(input_file_name)
        indices_to_ignore = category1_indices + non_category_1_indices
        filtered_tweet_objects = tweet_objects.drop(indices_to_ignore)
        
        return verifiable_claim_chain_system_message, \
                non_verifiable_claim_chain_system_message, \
                arbitrer_claim_chain_system_message, \
                filtered_tweet_objects

In [16]:
llm = TogetherLLM(
    model= "togethercomputer/llama-2-70b-chat",
    temperature=0.15,
    max_tokens=1500
)

input_file_name = "tweets - original.csv"
output_file_name = "updated_tweets_cat_1-70b.csv"

In [17]:
cat1 = Category1(llm, input_file_name)