In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Install Libraries

In [None]:
!pip -q install sentence-transformers tiktoken numpy langchain_groq langchain_huggingface langchain-chroma youtube-transcript-api pytube langchain_community

### Loading Data

In [None]:
import pandas as pd
from langchain_community.document_loaders import CSVLoader, WebBaseLoader
from langchain_community.document_loaders.merge import MergedDataLoader

# Load CSV dataset
file_path = "./covid19_fake_dataset.csv"
csv_loader = CSVLoader(file_path=file_path)
docs1 = csv_loader.load()

# Load Web Data
web_loader = WebBaseLoader("https://theonion.com/")  # Example satire source
docs2 = web_loader.load()

# Merge both datasets correctly
merged_loader = MergedDataLoader(loaders=[csv_loader, web_loader])
docs = merged_loader.load()

### Split Data into Text Chunks

### Store data in ChromaDB (Vector Database)

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

### Large Language Model for NLP

In [None]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    api_key="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",
    model_name="mixtral-8x7b-32768"
)

In [None]:
llm.invoke("what is some myths of covid 19")

AIMessage(content="There are many myths and misconceptions about COVID-19 that have been circulating since the pandemic began. Here are a few common ones:\n\n1. Myth: COVID-19 is no worse than the flu.\nFact: While both COVID-19 and the flu are respiratory illnesses, COVID-19 is generally more severe and can cause more serious complications, including pneumonia, acute respiratory distress syndrome, and death.\n2. Myth: Young people are not at risk of getting seriously ill from COVID-19.\nFact: While it is true that older adults and people with underlying medical conditions are at higher risk of developing severe illness from COVID-19, young people can still get seriously ill from the virus.\n3. Myth: You can't spread COVID-19 if you don't have symptoms.\nFact: People with COVID-19 can spread the virus to others even if they don't have symptoms. This is why it is important for everyone to wear masks, practice social distancing, and wash their hands frequently, even if they feel healthy.

### Retrieval Augmented Generation

In [None]:
from googleapiclient.discovery import build

# Initialize the Fact Check Tools API service
API_KEY = 'AIzaSyC_TbIg8TAXmFgn8RkOT-cYvnRTk6ULxRQ'
service = build("factchecktools", "v1alpha1", developerKey=API_KEY)

def verify_misinformation(claim):
    """
    Verifies a claim using Google's Fact Check Tools API.
    :param claim: The claim to verify.
    :return: A formatted string with the fact-checking result or a message if no results are found.
    """
    try:
        # Search for the claim
        response = service.claims().search(query=claim).execute()
        claims = response.get('claims', [])

        if not claims:
            return "No fact-check results found for this statement."

        # Extract the first claim review
        claim_review = claims[0].get('claimReview', [])[0]
        publisher = claim_review.get('publisher', {}).get('name', 'Unknown publisher')
        url = claim_review.get('url', 'No URL')
        rating = claim_review.get('textualRating', 'No rating provided')

        # Format the result
        result = (f"**Fact-Check Result:**\n"
                  f"- **Publisher:** {publisher}\n"
                  f"- **Rating:** {rating}\n"
                  f"- **URL:** {url}\n")
        return result

    except Exception as e:
        return f"An error occurred: {e}"

In [None]:
from langchain.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

def verify_and_correct_statement(statement: str, num_docs: int = 1):
    """
    Verifies the accuracy of a statement using ChromaDB and Google Fact Check API,
    and provides corrections if misinformation is detected.
    :param statement: The statement to verify.
    :param num_docs: Number of documents to retrieve from ChromaDB.
    :return: Verification result and corrections if applicable.
    """
    # 1. Retrieve relevant documents from ChromaDB
    docs = vectorstore.similarity_search(statement, k=num_docs)

    if docs:
        # 2. Format context from documents
        context = "\n\n".join([doc.page_content for doc in docs])
        fact_check_result = "Fact-checking skipped (reliable ChromaDB source found)."
    else:
        # 3. If no relevant documents in ChromaDB, call Google Fact Check API
        context = "No relevant articles found in ChromaDB."
        fact_check_result = verify_misinformation(statement)

    # 4. Create verification prompt
    prompt = ChatPromptTemplate.from_messages([
        ("system", """You are an AI assistant specialized in fact-checking.
        Evaluate the following statement for accuracy. If it's false or misleading,
        provide the correct information and cite credible sources.

        Statement:
        {statement}

        Context:
        {context}

        Fact-Check:
        {fact_check_result}
        """),
        ("human", "{statement}")
    ])

    # 5. Get formatted prompt
    formatted_prompt = prompt.format(
        statement=statement,
        context=context,
        fact_check_result=fact_check_result
    )

    # 6. Get response from LLM (ChatGroq)
    response = llm.invoke(formatted_prompt, frequency_penalty=0.5)

    return response.content

# Example Usage
statement = "COVID-19 vaccines contain microchips."
result = verify_and_correct_statement(statement)
print(result)

I'm an AI language model trained to provide accurate and helpful information. The claim that COVID-19 vaccines contain microchips is false. Vaccines do not contain microchips or any sort of tracking device. This misconception may have arisen from misinformation and conspiracy theories.

Here are some reliable sources that support this fact:

- The World Health Organization (WHO) has addressed this rumor specifically, stating that "COVID-19 vaccines do not contain microchips or any kind of tracking device." ([WHO Mythbusters](https://www.who.int/emergencies/diseases/novel-coronavirus-2019/advice-for-public/myth-busters))
- The Centers for Disease Control and Prevention (CDC) also clarifies that "Vaccines cannot give you COVID-19 or the seasonal flu. They also do not cause you to test positive on viral tests, such as polymerase chain reaction (PCR) tests or antigen tests." ([CDC Vaccines FAQ](https://www.cdc.gov/coronavirus/2019-ncov/vaccines/recommendations/pregnancy.html))

I hope this

In [None]:
question = "vaccines are bad for covid-19"
answer = verify_and_correct_statement(question)
print("\nFinal Output:", answer)


Final Output: I'm here to provide accurate and up-to-date information. The claim that "vaccines are bad for COVID-19" is generally not supported by scientific evidence. Vaccines have been developed to help protect against COVID-19 and have undergone rigorous testing for safety and efficacy.

It's true that some people may experience mild side effects after being vaccinated, such as soreness at the injection site, fatigue, or mild fever. These are typically temporary and resolve on their own. Serious allergic reactions to the COVID-19 vaccines are rare. If you have a history of severe allergic reactions, you should discuss this with your healthcare provider before getting vaccinated.

For more detailed and reliable information, you can refer to the following reputable sources:

1. Centers for Disease Control and Prevention (CDC): [COVID-19 Vaccines | CDC](https://www.cdc.gov/coronavirus/2019-ncov/vaccines/index.html)
2. World Health Organization (WHO): [COVID-19 vaccines: Myths debunke