# Fake News Detection

# Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import spacy
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

## Loading Data

In [None]:
df = pd.read_csv("fake_or_real_news.csv")
df.head()

In [None]:
df["label"].value_counts()

The Data is Balanced

---

## Data Preprocessing

### Drop the id column

In [None]:
df.drop(["id"], axis=1, inplace=True)

### Join the title and the text columns into one column

In [None]:
df["text"] = df["title"] + " " + df["text"]

### Drop title column

In [None]:
df.drop(["title"], axis=1, inplace=True)

### Convert labels to binary

In [None]:
df["is_fake"] = df["label"].map({"FAKE": 1, "REAL": 0})

### Text preprocessing

In [None]:
!spacy download en_core_web_lg

In [None]:
nlp = spacy.load("en_core_web_lg")

def preprocess(text):
    doc = nlp(text)
    filtered_tokens = []
    for token in doc:
        if not token.is_stop and not token.is_punct and not token.is_space:
            filtered_tokens.append(token.lemma_.lower())

    return ' '.join(filtered_tokens)

In [None]:
df["text"] = df["text"].apply(preprocess)

In [None]:
df.head()

---

## Splitting Data

In [None]:
feature = df["text"]
label = df["is_fake"]
X_train, X_test, y_train, y_test = train_test_split(feature, label, test_size=0.2)

## Feature Engineering

In [None]:
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

## Model Training

In [None]:
model = LinearSVC()
model.fit(X_train, y_train)

---

## Model Evaluation

### Classification Report

In [None]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

### Confusion Matrix

In [None]:
print(confusion_matrix(y_test, y_pred))

---