In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
soumya069_dataset_path = kagglehub.dataset_download('soumya069/dataset')

print('Data source import complete.')


In [None]:
!pip install transformers==<4.57.1>

In [None]:
!pip install scikit-learn

In [None]:
!pip install --upgrade transformers

In [None]:
import pandas as pd
import transformers
from sklearn.model_selection import train_test_split
import torch
from transformers import AutoTokenizer, AutoModelForMaskedLM,Trainer, TrainingArguments
from transformers import EarlyStoppingCallback

In [None]:
fake_df=pd.read_csv("/kaggle/input/dataset/Fake.csv")

In [None]:
fake_df.shape

In [None]:
fake_df['label']="fake"

In [None]:
fake_df.sample(5)

In [None]:
true_df=pd.read_csv("/kaggle/input/dataset/True.csv")
true_df.shape

In [None]:
true_df['label']="true"

In [None]:
true_df.sample(5)

In [None]:
df=pd.concat([true_df,fake_df])

In [None]:
df.shape

In [None]:
df.sample(5)

In [None]:
print("NULL values-->",df.isnull().sum())
print("Duplicated values-->",df.duplicated().sum())

In [None]:
df.iloc[0]

In [None]:
df['label'].value_counts() #balanced dataset hai

In [None]:
df.groupby('label')['subject'].value_counts()

In [None]:
print("text->",df.iloc[0]['text'])
print("title->",df.iloc[0]['title'])

In [None]:
#unneccessary columns-> title,subject,date
df.drop(['title','subject','date'],axis=1,inplace=True)


In [None]:
df.sample(5)

In [None]:
df['text'].str.len().sort_values(ascending=False)

In [None]:
df['label']=df['label'].map({'fake':0,"true":1})

In [None]:
df['text']=df['text'].str.lower()

In [None]:
X_train,X_test,y_train,y_test=train_test_split(df['text'],df['label'],test_size=0.2,random_state=42)

In [None]:
X_train.shape

In [None]:
device="cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
model_ckpt="microsoft/deberta-v3-base"

tokenizer=AutoTokenizer.from_pretrained(model_ckpt)

In [None]:
from transformers import AutoModelForSequenceClassification

In [None]:
model_bigbird=AutoModelForSequenceClassification.from_pretrained(model_ckpt,num_labels=2).to(device)

In [None]:
print("model_max_length:", tokenizer.model_max_length)

In [None]:
train_enc = tokenizer(list(X_train), truncation=True, padding=True, max_length=512)
val_enc = tokenizer(list(X_test), truncation=True, padding=True, max_length=512)

In [None]:
from torch.utils.data import Dataset

class NewsDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.encodings = tokenizer(
            texts,
            truncation=True,
            padding=True,
            max_length=max_length
        )
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

    def __len__(self):
        return len(self.labels)


In [None]:
X_train = X_train.tolist()
X_test = X_test.tolist()
y_train = y_train.tolist()
y_test = y_test.tolist()

In [None]:
train_dataset = NewsDataset(X_train, y_train, tokenizer)
val_dataset = NewsDataset(X_test, y_test, tokenizer)


In [None]:
train_dataset[1]

In [None]:
training_args = TrainingArguments(
    output_dir="./bert_fake_news_results",
    eval_strategy="epoch",
    eval_steps=500,
    save_strategy="epoch",
    save_steps=500,
    load_best_model_at_end=True,
    num_train_epochs=4,

    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    learning_rate=2e-5,
    weight_decay=0.01,
    warmup_ratio=0.1,
    logging_dir="./logs",
    fp16=True,
    report_to="none",
    logging_steps=100,
)


In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

In [None]:
trainer = Trainer(
    model=model_bigbird,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
)


In [None]:
trainer.train()

In [None]:
model_bigbird.save_pretrained("fk_new_dect_model")

In [None]:
tokenizer.save_pretrained("fk_news_dect_tokenizer")

In [None]:
model_bigbird.eval()

In [None]:
from transformers import pipeline

In [None]:
text="""WASHINGTON (Reuters) -
The head of a conservative Republican faction in the U.S. Congress, who voted this month for a huge expansion of the national debt
to pay for tax cuts, called himself a “fiscal conservative” on Sunday and urged budget restraint in 2018. In keeping with a sharp
pivot under way among Republicans, U.S. Representative Mark Meadows, speaking on CBS’ “Face the Nation,” drew a hard line on
federal spending, which lawmakers are bracing to do battle over in January. When they return from the holidays on Wednesday,
lawmakers will begin trying to pass a federal budget in a fight likely to be linked to other issues, such as immigration policy,
even as the November congressional election campaigns approach in which Republicans will seek to keep control of Congress.
President Donald Trump and his Republicans want a big budget increase in military spending, while Democrats also want proportional
increases for non-defense “discretionary” spending on programs that support education, scientific research, infrastructure,
public health and environmental protection. “The (Trump) administration has already been willing to say:
‘We’re going to increase non-defense discretionary spending ... by about 7 percent,’” Meadows,
chairman of the small but influential House Freedom Caucus, said on the program.
“Now, Democrats are saying that’s not enough, we need to give the government a pay raise of 10 to 11 percent.
For a fiscal conservative, I don’t see where the rationale is. ... Eventually you run out of other people’s money,” he said.
Meadows was among Republicans who voted in late December for their party’s debt-financed tax overhaul, which is expected to balloon the
federal budget deficit and add about $1.5 trillion over 10 years to the $20 trillion national debt.
“It’s interesting to hear Mark talk about fiscal responsibility,” Democratic U.S. Representative Joseph Crowley said on CBS.
Crowley said the Republican tax bill would require the  United States to borrow $1.5 trillion, to be paid off by future generations,
to finance tax cuts for corporations and the rich. “This is one of the least ... fiscally responsible bills we’ve ever seen passed
in the history of the House of Representatives. I think we’re going to be paying for this for many, many years to come,” Crowley said.
Republicans insist the tax package, the biggest U.S. tax overhaul in more than 30 years,  will boost the economy and job growth.
House Speaker Paul Ryan, who also supported the tax bill, recently went further than Meadows, making clear in a radio interview that
welfare or “entitlement reform,” as the party often calls it, would be a top Republican priority in 2018. In Republican parlance,
“entitlement” programs mean food stamps, housing assistance, Medicare and Medicaid health insurance for the elderly, poor and disabled,
as well as other programs created by Washington to assist the needy. Democrats seized on Ryan’s early December remarks, saying they showed Republicans would try to pay for their tax overhaul by seeking spending cuts for social programs. But the goals of House Republicans may have to take a back seat to the Senate, where the votes of some Democrats will be needed to approve a budget and prevent a government shutdown. Democrats will use their leverage in the Senate, which Republicans narrowly control, to defend both discretionary non-defense programs and social spending, while tackling the issue of the “Dreamers,” people brought illegally to the country as children. Trump in September put a March 2018 expiration date on the Deferred Action for Childhood Arrivals, or DACA, program, which protects the young immigrants from deportation and provides them with work permits. The president has said in recent Twitter messages he wants funding for his proposed Mexican border wall and other immigration law changes in exchange for agreeing to help the Dreamers. Representative Debbie Dingell told CBS she did not favor linking that issue to other policy objectives, such as wall funding. “We need to do DACA clean,” she said.  On Wednesday, Trump aides will meet with congressional leaders to discuss those issues. That will be followed by a weekend of strategy sessions for Trump and Republican leaders on Jan. 6 and 7, the White House said. Trump was also scheduled to meet on Sunday with Florida Republican Governor Rick Scott, who wants more emergency aid. The House has passed an $81 billion aid package after hurricanes in Florida, Texas and Puerto Rico, and wildfires in California. The package far exceeded the $44 billion requested by the Trump administration. The Senate has not yet voted on the aid."""

In [None]:
device

In [None]:
text1="""donald trump just couldn t wish all americans a happy new year and leave it at that. instead, he had to give a shout out to his enemies, haters and  the very dishonest fake news media.  the former reality show star had just one job to do and he couldn t do it. as our country rapidly grows stronger and smarter, i want to wish all of my friends, supporters, enemies, haters, and even the very dishonest fake news media, a happy and healthy new year,  president angry pants tweeted.  2018 will be a great year for america! as our country rapidly grows stronger and smarter, i want to wish all of my friends, supporters, enemies, haters, and even the very dishonest fake news media, a happy and healthy new year. 2018 will be a great year for america!  donald j. trump (@realdonaldtrump) december 31, 2017trump s tweet went down about as welll as you d expect.what kind of president sends a new year s greeting like this despicable, petty, infantile gibberish? only trump! his lack of decency won t even allow him to rise above the gutter long enough to wish the american citizens a happy new year!  bishop talbert swan (@talbertswan) december 31, 2017no one likes you  calvin (@calvinstowell) december 31, 2017your impeachment would make 2018 a great year for america, but i ll also accept regaining control of congress.  miranda yaver (@mirandayaver) december 31, 2017do you hear yourself talk? when you have to include that many people that hate you you have to wonder? why do the they all hate me?  alan sandoval (@alansandoval13) december 31, 2017who uses the word haters in a new years wish??  marlene (@marlene399) december 31, 2017you can t just say happy new year?  koren pollitt (@korencarpenter) december 31, 2017here s trump s new year s eve tweet from 2016.happy new year to all, including to my many enemies and those who have fought me and lost so badly they just don t know what to do. love!  donald j. trump (@realdonaldtrump) december 31, 2016this is nothing new for trump. he s been doing this for years.trump has directed messages to his  enemies  and  haters  for new year s, easter, thanksgiving, and the anniversary of 9/11. pic.twitter.com/4fpae2kypa  daniel dale (@ddale8) december 31, 2017trump s holiday tweets are clearly not presidential.how long did he work at hallmark before becoming president?  steven goodine (@sgoodine) december 31, 2017he s always been like this . . . the only difference is that in the last few years, his filter has been breaking down.  roy schulze (@thbthttt) december 31, 2017who, apart from a teenager uses the term haters?  wendy (@wendywhistles) december 31, 2017he s a fucking 5 year old  who knows (@rainyday80) december 31, 2017so, to all the people who voted for this a hole thinking he would change once he got into power, you were wrong! 70-year-old men don t change and now he s a year older.photo by andrew burton/getty images."""

In [None]:
import torch.nn.functional as F

In [None]:
inputs = tokenizer(text1, return_tensors="pt", truncation=True, padding=True, max_length=256)
inputs = {k: v.to(device) for k, v in inputs.items()}

with torch.no_grad():
    outputs = model_bigbird(**inputs)
    logits = outputs.logits
    probs = F.softmax(logits, dim=1)
    predicted_class_id = torch.argmax(probs, dim=1).item()
    confidence = probs[0][predicted_class_id].item()


labels = ["Fake","true"]
predicted_label = labels[predicted_class_id]

print(f"Prediction: {predicted_label} ({confidence*100:.2f}% confidence)")

In [None]:
df[df['label']==0].iloc[0]['text']