In [2]:
import numpy as np
import pandas as pd
from mlx_lm import load, generate

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model, tokenizer = load("microsoft/Phi-3-mini-4k-instruct")

Fetching 13 files: 100%|██████████| 13/13 [00:00<00:00, 108401.50it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [8]:
#Load Datadframe
df = pd.read_csv('fake_or_real_news.csv')

#Drop ID column
df = df.drop(columns=['id'])

#Label mapping
label_mapping = {
    'REAL': 'true',
    'FAKE': 'false'
}

# Apply mapping to the 'label' column
df['label'] = df['label'].map(label_mapping)

#Randomly shuffle the df
df = df.sample(frac=1).reset_index(drop=True)


In [9]:
#Take first 10 rows only
df = df[:1000]
df

Unnamed: 0,title,text,label
0,"Trump Willing to Meet N.Korea's Kim, Wants to ...","In a wide-ranging discussion, Trump also said ...",true
1,It’s A Setup: Dems Claim Russians Will Undermi...,Tweet Home » Headlines » World News » It’s A S...,false
2,PrankGate? Patriots' Gostkowski Terrorizing NF...,"Friday, 4 November 2016 \nFirst there was SpyG...",false
3,Clinton’s 2016 makeover the latest in long lin...,"Another campaign, another reset for Hillary Cl...",true
4,12 and 63-minute videos: Donald Trump connects...,"Posted on November 7, 2016 by Carl Herman \nJo...",false
...,...,...,...
995,White House admits should have sent 'higher-pr...,The White House acknowledged Monday that it er...,true
996,Donald Trump’s avenging angels: How the orange...,"Now, historians must begin to consider alterna...",true
997,Jonah Goldberg: Hillary's Iowa 'win' is a big ...,Result signals a long war between Clinton esta...,true
998,The affair allegations that derailed Kevin McC...,A lot of people I speak to who aren't deeply i...,true


In [10]:
(df['title'][0])

"Trump Willing to Meet N.Korea's Kim, Wants to Renegotiate Paris Climate Accord"

In [11]:
def generate_question(headline, text, model, tokenizer):
    query = f'''
    <|user|>
    You're tasked with fact-checking news headlines for accuracy. 
    Given a headline, generate 1 question that needs to be true to verify the 
    headlines authenticity using a Google search to scrape the answer from the quick search box. 
    Ask the crucial questions first. Your output should only be one question in string for me to ingest in my backend without any other text. 
    The headline is : {headline}<|end|>
    <|assistant|>'''
    response = generate(model, tokenizer, prompt=query)
    response = response.replace('<|end|>','')
    print(f"Headline: {headline}")
    print(f'Generated Question: {response}')
    print('-'*200)
    return response

# Apply the function to the 'title' column
df['Question_phi'] = df.apply(lambda row: generate_question(row['title'], row['text'], model, tokenizer), axis=1)

Headline: Trump Willing to Meet N.Korea's Kim, Wants to Renegotiate Paris Climate Accord
Generated Question: "Is there any credible source confirming that Trump expressed a willingness to meet with North Korea's Kim Jong-un and a desire to renegotiate the Paris Climate Accord?"
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Headline: It’s A Setup: Dems Claim Russians Will Undermine Elections With Fake Documents Showing Voter Fraud
Generated Question: "What evidence do Democrats have to support their claim that Russians will undermine elections with fake documents showing voter fraud?"
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Headline: PrankGate? Patriots' Gostkowski Terrorizing

In [12]:
df

Unnamed: 0,title,text,label,Question_phi
0,"Trump Willing to Meet N.Korea's Kim, Wants to ...","In a wide-ranging discussion, Trump also said ...",true,"""Is there any credible source confirming that ..."
1,It’s A Setup: Dems Claim Russians Will Undermi...,Tweet Home » Headlines » World News » It’s A S...,false,"""What evidence do Democrats have to support th..."
2,PrankGate? Patriots' Gostkowski Terrorizing NF...,"Friday, 4 November 2016 \nFirst there was SpyG...",false,"""What is the PrankGate incident involving Patr..."
3,Clinton’s 2016 makeover the latest in long lin...,"Another campaign, another reset for Hillary Cl...",true,"""What are the previous instances of political ..."
4,12 and 63-minute videos: Donald Trump connects...,"Posted on November 7, 2016 by Carl Herman \nJo...",false,"""Is there evidence of Donald Trump making 12 a..."
...,...,...,...,...
995,White House admits should have sent 'higher-pr...,The White House acknowledged Monday that it er...,true,"""Who was the 'higher-profile' official that th..."
996,Donald Trump’s avenging angels: How the orange...,"Now, historians must begin to consider alterna...",true,"""What are the key events and figures that have..."
997,Jonah Goldberg: Hillary's Iowa 'win' is a big ...,Result signals a long war between Clinton esta...,true,"""What was the outcome of Hillary Clinton's per..."
998,The affair allegations that derailed Kevin McC...,A lot of people I speak to who aren't deeply i...,true,"""What are the details of the affair allegation..."


In [13]:
new_order = ['title', 'text', 'Question_phi','label']

df = df[new_order]
df

Unnamed: 0,title,text,Question_phi,label
0,"Trump Willing to Meet N.Korea's Kim, Wants to ...","In a wide-ranging discussion, Trump also said ...","""Is there any credible source confirming that ...",true
1,It’s A Setup: Dems Claim Russians Will Undermi...,Tweet Home » Headlines » World News » It’s A S...,"""What evidence do Democrats have to support th...",false
2,PrankGate? Patriots' Gostkowski Terrorizing NF...,"Friday, 4 November 2016 \nFirst there was SpyG...","""What is the PrankGate incident involving Patr...",false
3,Clinton’s 2016 makeover the latest in long lin...,"Another campaign, another reset for Hillary Cl...","""What are the previous instances of political ...",true
4,12 and 63-minute videos: Donald Trump connects...,"Posted on November 7, 2016 by Carl Herman \nJo...","""Is there evidence of Donald Trump making 12 a...",false
...,...,...,...,...
995,White House admits should have sent 'higher-pr...,The White House acknowledged Monday that it er...,"""Who was the 'higher-profile' official that th...",true
996,Donald Trump’s avenging angels: How the orange...,"Now, historians must begin to consider alterna...","""What are the key events and figures that have...",true
997,Jonah Goldberg: Hillary's Iowa 'win' is a big ...,Result signals a long war between Clinton esta...,"""What was the outcome of Hillary Clinton's per...",true
998,The affair allegations that derailed Kevin McC...,A lot of people I speak to who aren't deeply i...,"""What are the details of the affair allegation...",true


In [14]:
df.to_csv('Question_phi.csv', index=False)