In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import re
import string

In [None]:
data_fake = pd.read_csv('Fake.csv')
data_true = pd.read_csv('True.csv')

In [None]:
data_fake.head()

In [None]:
data_true.head()

In [None]:
data_fake["class"] = 0
data_true["class"] = 1

In [None]:
data_fake.shape, data_true.shape

In [None]:
# Select the last 10 rows of data_fake
data_fake_manual_testing = data_fake.tail(10)

# Remove rows from 23480 to 23471 in data_fake
for i in range(23480, 23470, -1):
    if i in data_fake.index:  # Ensure the index exists before dropping
        data_fake.drop([i], axis=0, inplace=True)

# Select the last 10 rows of data_fake again (optional)
data_true_manual_testing = data_fake.tail(10)

# Remove rows from 21416 to 21407 in data_true
for i in range(21416, 21406, -1):
    if i in data_true.index:  # Ensure the index exists before dropping
        data_true.drop([i], axis=0, inplace=True)



In [None]:
data_fake_manual_testing['class'] = 0
data_true_manual_testing['class'] = 1

In [None]:
data_fake_manual_testing.head()

In [None]:
data_true_manual_testing.head()

In [None]:
data_merge = pd.concat([data_fake, data_true] , axis = 0)
data_merge.head(10)

In [None]:
data_merge.columns

In [None]:
data = data_merge.drop(['title', 'subject', 'date'], axis = 1)

In [None]:
data.isnull().sum()

In [None]:
data = data.sample(frac = 1)
data.head()

In [None]:
data.reset_index(inplace = True)
data.drop(['index'], axis = 1, inplace = True)

In [None]:
data.columns

In [None]:
data.head()

In [None]:
def wordopt(text):
    text = text.lower()
    text = re.sub('\[.*?\]','', text)
    text = re.sub("\\W", " ", text)
    text = re.sub('https?://\S+|www\.\S+','',text)
    text = re.sub('<.*?>+','',text)
    text = re.sub('[%s]' % re.escape(string.punctuation),'',text)
    text = re.sub('\n','', text)
    text = re.sub('\w*\d\w*','',text)
    return text

In [None]:
data['text'] = data['text'].apply(wordopt)

In [None]:
x = data['text']
y = data['class']

In [None]:
x_train,x_test, y_train, y_test = train_test_split(x,y,test_size = 0.25)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize TfidfVectorizer
vectorization = TfidfVectorizer()

# Fit and transform training data
xv_train = vectorization.fit_transform(x_train)

# Transform test data
xv_test = vectorization.transform(x_test)


In [None]:
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression()
LR.fit(xv_train, y_train)

In [None]:
pred_lr = LR.predict(xv_test)

In [None]:
LR.score(xv_test, y_test)

In [None]:
print(classification_report(y_test,pred_lr))

In [None]:
import joblib

# Save the trained Logistic Regression model
joblib.dump(LR, 'news_model.pkl')

# Save the TF-IDF vectorizer
joblib.dump(vectorization, 'tfidf_vectorizer.pkl')

print("Model and vectorizer saved successfully!")


In [None]:
# Load the model and vectorizer
model = joblib.load('news_model.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')

# Example usage
user_input = input("Enter a news article: ")
vectorized_input = vectorizer.transform([user_input])  # Transform user input
prediction = model.predict(vectorized_input)
result = "True News" if prediction[0] == 1 else "Fake News"
print(f"The article is classified as: {result}")


In [None]:
import joblib

# Function to classify user input
def classify_news(user_text, LR, vectorization):
    """
    Classifies the input news article as 'True News' or 'Fake News' using a pre-trained model and TF-IDF vectorizer.
    
    Parameters:
    - user_text (str): The input news article to classify.
    - LR: The trained Logistic Regression model.
    - vectorization: The TF-IDF vectorizer used to transform text.
    
    Returns:
    - str: "True News" or "Fake News" based on the classification result.
    """
    # Clean the input text (assuming you have a wordopt function for cleaning)
    cleaned_text = wordopt(user_text)

    # Transform the text to TF-IDF vector
    vectorized_text = vectorization.transform([cleaned_text]).toarray()

    # Predict using the trained Logistic Regression model (LR)
    prediction = LR.predict(vectorized_text)

    # Return the result
    return "True News" if prediction[0] == 1 else "Fake News"

# Function to load the model and vectorizer
def load_model_and_vectorizer(model_path='news_model.pkl', vectorizer_path='tfidf_vectorizer.pkl'):
    """
    Loads the trained Logistic Regression model and TF-IDF vectorizer from the provided file paths.
    
    Parameters:
    - model_path (str): Path to the saved Logistic Regression model.
    - vectorizer_path (str): Path to the saved TF-IDF vectorizer.
    
    Returns:
    - LR: The loaded Logistic Regression model.
    - vectorization: The loaded TF-IDF vectorizer.
    """
    try:
        LR = joblib.load(model_path)           # Load the trained Logistic Regression model
        vectorization = joblib.load(vectorizer_path)      # Load the trained TF-IDF vectorizer
        print("Model and vectorizer loaded successfully!")
        return LR, vectorization
    except FileNotFoundError:
        print("Model or vectorizer file not found. Please check the paths.")
        return None, None

# Load the model and vectorizer
LR, vectorization = load_model_and_vectorizer()

# If model and vectorizer are loaded successfully, classify the user input
if LR is not None and vectorization is not None:
    user_input = input("Enter a news article: ")
    result = classify_news(user_input, LR, vectorization)
    print(f"The article is classified as: {result}")
