In [1]:
#Import necessary libraries:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
#Read the data
df = pd.read_csv("D:\\news.csv")

In [3]:
#Get shape and head
df.shape
df.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,Chandrayan 3 landing on moon's south pole,August,REAL
1,10294,Recession is likely in 2024 and last about 10 ...,Forbes,REAL
2,3608,Indian Cricket team won two times in World Cup,India,REAL
3,10142,Karolina Bielawskalost as miss world in 2022,Europe,FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL


In [4]:
#DataFlair - Get the labels
labels = df.label
labels.head()

0    REAL
1    REAL
2    REAL
3    FAKE
4    REAL
Name: label, dtype: object

In [5]:
#DataFlair - Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(df['text'], labels, test_size=0.2, random_state=7)

In [6]:
# Initialize a TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
# Replace NaN values with empty strings in both training and test sets
x_train = x_train.fillna('')
x_test = x_test.fillna('')

In [7]:
# Fit and transform the training set, transform the test set
tfidf_train = tfidf_vectorizer.fit_transform(x_train)
tfidf_test = tfidf_vectorizer.transform(x_test)

In [8]:
# Initialize a PassiveAggressiveClassifier and fit to the training data
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train, y_train)

In [9]:
# Predict labels on the test set using the trained classifier
y_pred = pac.predict(tfidf_test)
# Calculate accuracy
score = accuracy_score(y_test, y_pred)
print(f'Accuracy: {round(score*100, 2)}%')

Accuracy: 88.0%


Accuracy: 88.0%


In [11]:
# Build a confusion matrix to evaluate the classifier's performance
confusion_matrix_result = confusion_matrix(y_test, y_pred, labels=['FAKE', 'REAL'])
print("Confusion Matrix:")
print(confusion_matrix_result)

Confusion Matrix:
[[17  3]
 [ 3 27]]
