<a href="https://colab.research.google.com/github/Abin1401/Abin1401/blob/main/Fake_News_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
import warnings

# Suppress ConvergenceWarning
warnings.filterwarnings("ignore", category=UserWarning, module='sklearn')

# Load the data from the provided CSV files
true_news = pd.read_csv('/content/Fake.csv')
fake_news = pd.read_csv('/content/True.csv')

# Ensure the column names for text data are correct
assert 'text' in true_news.columns, "Text column missing in True.csv"
assert 'text' in fake_news.columns, "Text column missing in Fake.csv"

# Add a 'label' column to each dataset
true_news['label'] = 'REAL'
fake_news['label'] = 'FAKE'

# Combine the datasets into one
data = pd.concat([true_news, fake_news], ignore_index=True)

# Convert labels to binary (REAL = 0, FAKE = 1)
data['label'] = data['label'].apply(lambda x: 0 if x == "REAL" else 1)

# Split data into features and labels
x, y = data['text'], data['label']

# Split data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Vectorize text data using TF-IDF
vectorizer = TfidfVectorizer(stop_words="english", max_df=0.7)
x_train_vectorized = vectorizer.fit_transform(x_train)
x_test_vectorized = vectorizer.transform(x_test)

# Train a Linear SVM model with explicit `dual` parameter
clf = LinearSVC(max_iter=10000, dual=False)  # Set `dual` explicitly to False
clf.fit(x_train_vectorized, y_train)

# Evaluate the model
y_pred = clf.predict(x_test_vectorized)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy * 100:.2f}%")

# Predict on the 10th test sample
text_sample = x_test.iloc[9]  # 10th item in zero-indexed
vectorized_text = vectorizer.transform([text_sample])
prediction = clf.predict(vectorized_text)
prediction_label = "FAKE" if prediction[0] == 1 else "REAL"
print(f"Prediction for the 10th text: {prediction_label}")

# Output the actual label of the 10th test sample
actual_label = "FAKE" if y_test.iloc[9] == 1 else "REAL"
print(f"Actual label of the 10th text: {actual_label}")

# Example Numpy Array Output
array = np.array([1], dtype=np.int64)
print(array)


Model accuracy: 99.44%
Prediction for the 10th text: FAKE
Actual label of the 10th text: FAKE
[1]
