In [None]:
import pandas as pd
import nltk
import string
import joblib
import csv
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

#function of preprocessing
def preprocess_text(text):
    """Remove stopwords, punctuation, and apply stemming & lemmatization."""
    if not isinstance(text, str):  # convert nan or numbers to empty string
        text = ""
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = text.split()
    words = [lemmatizer.lemmatize(stemmer.stem(word)) for word in words if word not in stop_words]
    return " ".join(words)

true_df = pd.read_csv('True.csv', quoting=csv.QUOTE_NONE, encoding='utf-8', on_bad_lines='skip')
fake_df = pd.read_csv('Fake.csv', quoting=csv.QUOTE_NONE, encoding='utf-8', on_bad_lines='skip')

# Labeling 
true_df['label'] = 0  
fake_df['label'] = 1 

# combine datasets
df = pd.concat([true_df, fake_df], ignore_index=True)

#  missing values
df['text'] = df['text'].fillna('').astype(str)

df['text'] = df['text'].apply(preprocess_text)

X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

# convert from text to numerbers
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

#  random forest model
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train_tfidf, y_train)

accuracy = accuracy_score(y_test, model.predict(X_test_tfidf))
print(f'Model Accuracy: {accuracy}')

joblib.dump(model, 'model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')

#saving model to use again
model = joblib.load('model.pkl')
vectorizer = joblib.load('vectorizer.pkl')

#function of prediction
def predict_news(news_text):
    """Predicts if the news is real or fake."""
    processed_text = preprocess_text(news_text) 
    text_tfidf = vectorizer.transform([processed_text])  
    prediction = model.predict(text_tfidf)  

    return "Fake News" if prediction[0] == 1 else "Real News"

#user input
news_article = input("Enter the news article: ")
result = predict_news(news_article)
print("\nPrediction:", result)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Model Accuracy: 0.5125233146815881
Enter the news article: Pope Francis used his annual Christmas Day message to rebuke Donald Trump without even mentioning his name. The Pope delivered his message just days after members of the United Nations condemned Trump s move to recognize Jerusalem as the capital of Israel. The Pontiff prayed on Monday for the  peaceful coexistence of two states within mutually agreed and internationally recognized borders. We see Jesus in the children of the Middle East who continue to suffer because of growing tensions between Israelis and Palestinians,  Francis said.  On this festive day, let us ask the Lord for peace for Jerusalem and for all the Holy Land. Let us pray that the will to resume dialogue may prevail between the parties and that a negotiated solution can finally be reached. The Pope went on to plead for acceptance of refugees who have been forced from their homes, and that is an issue Trump continues to fight against. Francis used Jesus for whic