# Fake News Detection

## Importing the necessary libraries

In [None]:
import numpy as np
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

## Load the dataset

In [None]:
df = pd.read_csv('news.csv')

## Data exploration

In [None]:
df.shape
df.head()

## Handle NaN values

In [None]:
df['text'] = df['text'].fillna('')

## Get the labels

In [None]:
labels = df.label
labels.head()

## Split the dataset into training and testing sets

In [None]:
x_train, x_test, y_train, y_test = train_test_split(df['text'], labels, test_size=0.2, random_state=7)

## Initialize a TfidfVectorizer

In [None]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

tfidf_train = tfidf_vectorizer.fit_transform(x_train)
tfidf_test = tfidf_vectorizer.transform(x_test)

## Initialize a PassiveAggressiveClassifier

In [None]:
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train, y_train)

## Predict on the test set and calculate accuracy

In [None]:
y_pred = pac.predict(tfidf_test)
score = accuracy_score(y_test, y_pred)
print(f'Accuracy: {round(score*100, 2)}%')

## Build a confusion matrix

In [None]:
confusion_matrix(y_test, y_pred, labels=['FAKE', 'REAL'])