In [18]:
pip install ollama

Note: you may need to restart the kernel to use updated packages.


In [19]:
import pandas as pd
import ollama
import json
from tqdm import tqdm
import re
from sklearn.model_selection import train_test_split
from datasets import load_dataset

In [7]:
# Load your dataset
from datasets import load_dataset

# If the dataset is gated/private, make sure you have run huggingface-cli login
dataset = load_dataset("Koni99/fake-news-testset")

In [16]:
dataset["test"]["text"][0]

'Specter of Trump Loosens Tongues, if Not Purse Strings, in Silicon Valley - The New York Times PALO ALTO, Calif.  —   After years of scorning the political process, Silicon Valley has leapt into the fray. The prospect of a President Donald J. Trump is pushing the tech community to move beyond its traditional role as donors and to embrace a new existence as agitators and activists. A distinguished venture capital firm emblazoned on its corporate home page an earthy   epithet. One prominent tech chieftain says the consequences of Mr. Trump’s election would “range between disastrous and terrible. ” Another compares him to a dictator. And nearly 150 tech leaders signed an open letter decrying Mr. Trump and his campaign of “anger” and “bigotry. ” Not quite all the action is  . Peter Thiel, a founder of PayPal and Palantir who was the first outside investor in Facebook, spoke at the Republican convention in July. The New York Times reported on Saturday that Mr. Thiel is giving $1. 25 millio

In [5]:
 # set up the prompt for the model

 def get_prediction_ollama(model,article):
    """
     :param model: the SLM to use e.g. mistral for Mistral-7B
     :param article: the news article to be analyzed
     :return: response generated by the model
    """

    impersonator = "You are an expert AI fact checker trained to detect fake news."
    instructor = "Is this article real or fake news? Analyze the given text in detail as a fact checker would. Explain your reasoning step-by-step. Check for misleading information, false claims, biased language. If the news article is real, respond with 1 for 'True', if fake, respond with 0 for 'False'."
    cloze_prompt = "Structure your answer as a dictionary object with the two keys 'rationale' for your analytic reasoning and 'prediction' for your final prediction. The values for 'prediction' are binary. Either 1 for a real article or 0 for fake news. Do not add anything else. Your output dictinary should look like this: { rationale: <your_rationale>, prediction: <your_prediction [0,1]> }."
    prompt = f"{instructor} {cloze_prompt}"
    system_message = impersonator
    user_message = prompt +"Here is the article for you to analyze: "+ article
    messages = [
        {'role':'system',
         'content': system_message},
        {'role':'user',
         'content': user_message},
    ]
    #create the response using Ollama
    response = ollama.chat(model=model, messages=messages)
    return response['message']['content']

In [20]:
#example
print(get_prediction_ollama("llama2",dataset["test"]["text"][1]))

{ rationale: Here is my analysis of the article as a fact checker:

1. The source of the article is not provided, which could indicate that it may not be a credible source.
2. The article mentions "terrorists" without providing any context or evidence to support the claim. This lack of specificity could imply that the article is spreading propaganda or misinformation.
3. The article references "insurgents" attempting to break into Aleppo, but it does not provide any information on who these insurgents are or what their goals are. This lack of context makes it difficult to verify the claims made in the article.
4. The article mentions that Russia has increased its intelligence activities in Syria, which could be a legitimate reason for Russia's actions. However, without additional information, it is impossible to determine if these activities are lawful or ethical.
5. The article mentions that Russia will use "new weapons" during upcoming attacks on terrorists. This claim is not verifie

In [69]:
print(get_prediction_ollama("mistral",dataset["test"]["text"][1]))

 { rationale: "The article reports on a legitimate event - the passage of disaster aid, government funding, and debt ceiling increase legislation in the Senate. The sources cited are reputable (Reuters), and the information presented aligns with other credible news outlets. The language is factual, and there is no apparent misinformation or false claims. The article does mention some political positions and disagreements but does not take a biased stance.", prediction: 1 }


In [1]:
print(get_prediction_ollama("phi",dataset["test"]["text"][1]))

NameError: name 'get_prediction_ollama' is not defined

In [5]:
def get_completion(messages):
    response = ollama.chat(model="mistral", messages=messages)
    return response['message']['content']

In [6]:
# extract the prediction from the raw model output
def get_prediction(article):
    impersonator = "You are an expert AI fact checker trained to detect fake news."
    instructor = "Is this article real or fake news? Analyze the given text in detail as a fact checker would. Explain your reasoning step-by-step. Check for misleading information, false claims, biased language. If the news article is real, respond with 1 for 'True', if fake, respond with 0 for 'False'."
    cloze_prompt = "Structure your answer as a JSON object with the two keys 'rationale' for your analytic reasoning and 'prediction' for your final prediction. The values for 'prediction' are binary. Either 1 for a real article or 0 for fake news. In 'rationale' you provide a string with your detailed reasoning process that led you to the decision you took. Remember if the article contains misleading information, false claims, biased language or any other hints of disinformation your prediction should be 0 for fake news, else 1 for true news. To summarize the JSON object looks like this: {'rationale': <your_analytic_reasoning_process>, 'prediction': <your_prediction[0,1]>} "
    prompt = f"{instructor} {cloze_prompt}"
    system_message = impersonator
    user_message = "Here is the article for you to analyze: " + article + prompt

    messages =  [
        {'role':'system',
         'content': system_message},
        {'role':'user',
         'content': user_message},
    ]
    response = get_completion(messages)
    text = response
    model_prediction = "Check manually!"
    try:
        response = json.loads(str(response))
        model_prediction = int(response.get("prediction"))
    except json.JSONDecodeError:
        #toDo add cases where it is prediction" or prediction' and the number is also in "" or ''
        match = re.search(r'["\']?[pP]rediction["\']?\s*:\s*["\']?(\d)["\']?', text)
        if match:
            model_prediction = int(match.group(1))
    return response,model_prediction

In [32]:
#returns final prediction and raw model response
def extend_data(dataframe, max_retries=5):
    model_predictions = []
    raw_response = []
    for i in tqdm(range(len(dataframe['label']))):
        for attempt in range(max_retries):
            try:
                response, prediction = get_prediction(dataframe['text'][i])
                model_predictions.append(prediction)
                raw_response.append(response)
                break
            except Exception as e:
                print(f"Error: {e}. Attempt {attempt+1} of {max_retries}. Retrying...")

        else:
            print(f"Max retries exceeded for index {i}. Request failed.")
            model_predictions.append("Error")
            raw_response.append("Error")
    return model_predictions, raw_response

In [33]:
#extend the dataframe with the prediction and raw response
def mistral_testset(dataframe, filename):
    predictions, raw = extend_data(dataframe)
    pd_df = pd.DataFrame(dataframe)
    extended_dataframe = pd_df.assign(model_prediction = predictions, raw_model_response = raw)
    # Save the DataFrame as a CSV file
    extended_dataframe.to_csv(filename, index=True)