In [2]:
import pandas as pd
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the datasets
true_news = pd.read_csv(r'C:\Users\Meghna Ghosh\Downloads\True.csv')
fake_news = pd.read_csv(r'C:\Users\Meghna Ghosh\Downloads\Fake.csv')

# Add a label column to each dataset
true_news['label'] = 1  # Real news label
fake_news['label'] = 0  # Fake news label

# Combine the datasets
news_dataset = pd.concat([true_news, fake_news])

# Drop unnecessary columns if present
news_dataset = news_dataset[['text', 'label']]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(news_dataset['text'], news_dataset['label'], test_size=0.2, random_state=42)

# Initialize the TfidfVectorizer and fit_transform on the training data
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)

# Initialize and train the PassiveAggressiveClassifier
model = PassiveAggressiveClassifier(max_iter=50)
model.fit(X_train_tfidf, y_train)

# Transform the test data and evaluate the model
X_test_tfidf = vectorizer.transform(X_test)
y_pred = model.predict(X_test_tfidf)

# Print the accuracy and confusion matrix
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))

# Define the prediction function
def predict_fake_news(input_text):
    input_data = vectorizer.transform([input_text])  # Use vectorizer instead of vector
    prediction = model.predict(input_data)
    return prediction[0]

# Get user input
input_text = str(input("Enter the news headline or article content to test: "))

if input_text:
    pred = predict_fake_news(input_text)
    if pred == 0:
        print('The News is Fake')
    else:
        print('The News Is Real')

Accuracy: 99.44%
Confusion Matrix:
[[4621   29]
 [  21 4309]]
Enter the news headline or article content to test:  Trump Said Some INSANELY Racist Stuff Inside The Oval Office, And Witnesses Back It Up
The News is Fake


In [3]:
import pandas as pd
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the datasets
true_news = pd.read_csv(r'C:\Users\Meghna Ghosh\Downloads\True.csv')
fake_news = pd.read_csv(r'C:\Users\Meghna Ghosh\Downloads\Fake.csv')

# Add a label column to each dataset
true_news['label'] = 1  # Real news label
fake_news['label'] = 0  # Fake news label

# Combine the datasets
news_dataset = pd.concat([true_news, fake_news])

# Drop unnecessary columns if present
news_dataset = news_dataset[['text', 'label']]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(news_dataset['text'], news_dataset['label'], test_size=0.2, random_state=42)

# Initialize the TfidfVectorizer and fit_transform on the training data
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)

# Initialize and train the PassiveAggressiveClassifier
model = PassiveAggressiveClassifier(max_iter=50)
model.fit(X_train_tfidf, y_train)

# Transform the test data and evaluate the model
X_test_tfidf = vectorizer.transform(X_test)
y_pred = model.predict(X_test_tfidf)

# Print the accuracy and confusion matrix
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))

# Define the prediction function
def predict_fake_news(input_text):
    input_data = vectorizer.transform([input_text])  # Use vectorizer instead of vector
    prediction = model.predict(input_data)
    return prediction[0]

# Get user input
input_text = str(input("Enter the news headline or article content to test: "))

if input_text:
    pred = predict_fake_news(input_text)
    if pred == 0:
        print('The News is Fake')
    else:
        print('The News Is Real')

Accuracy: 99.43%
Confusion Matrix:
[[4619   31]
 [  20 4310]]
Enter the news headline or article content to test: New York governor questions the constitutionality of federal tax overhaul
The News Is Real


In [5]:
import joblib

joblib.dump(model, 'model.joblib')
joblib.dump(tfidf_vectorizer, 'vectorizer.joblib')

NameError: name 'tfidf_vectorizer' is not defined

In [6]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix
import joblib

# Load the datasets
true_news = pd.read_csv(r'C:\Users\Meghna Ghosh\Downloads\True.csv')
fake_news = pd.read_csv(r'C:\Users\Meghna Ghosh\Downloads\Fake.csv')

# Add a label column to each dataset
true_news['label'] = 1  # Real news label
fake_news['label'] = 0  # Fake news label

# Combine the datasets
news_dataset = pd.concat([true_news, fake_news]).reset_index(drop=True)

# Preprocess the dataset
X = news_dataset['text']
y = news_dataset['label']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Vectorize the text data
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_vec = tfidf_vectorizer.fit_transform(X_train)
X_test_vec = tfidf_vectorizer.transform(X_test)

# Define the parameter grid for hyperparameter tuning
param_grid = {'C': np.logspace(-4, 4, 50), 'penalty': ['l1', 'l2']}

# Initialize GridSearchCV with Logistic Regression
clf = GridSearchCV(LogisticRegression(random_state=0, solver='liblinear'), param_grid, cv=5, verbose=0, n_jobs=-1)

# Fit the model
best_model = clf.fit(X_train_vec, y_train)

# Predict on the test set
y_pred = best_model.predict(X_test_vec)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Print results
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Confusion Matrix:")
print(conf_matrix)

# Save the model and vectorizer
joblib.dump(best_model, 'model.joblib')
joblib.dump(tfidf_vectorizer, 'vectorizer.joblib')

# Define the prediction function
def prediction(input_text):
    input_data = tfidf_vectorizer.transform([input_text])
    prediction = best_model.predict(input_data)
    return prediction[0]

# Example usage
input_text = str(input("Enter the news headline or article content to test: "))
if input_text:
    pred = prediction(input_text)
    if pred == 0:
        print('The News is Fake')
    else:
        print('The News is Real')


Accuracy: 99.53%
Confusion Matrix:
[[6966   30]
 [  33 6441]]
Enter the news headline or article content to test: New York governor questions the constitutionality of federal tax overhaul
The News is Real
