In [11]:
import numpy as np
import pandas as pd
import re
import string

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [12]:
#Initialize the datasets
fake_data = pd.read_csv("Fake.csv")
real_data = pd.read_csv("True.csv")

#Create labels for the datasets
fake_data['label'] = "FAKE"
real_data['label'] = "REAL"

#combine the two datasets
data = pd.concat([fake_data,real_data]).reset_index(drop=True)

#Shuffling to randomize the data to avoid biased results
data = data.sample(frac = 1)

#Preprocess data to clean it of unnecessary characters such as punctuations,urls,etc that may confuse the model during training
def preprocess(text):
    text = text.lower() #converts all characters to lowercase for uniformity
    text = re.sub('\[.*?\]','',text) #remove non-alphabetics
    text = re.sub("\\W"," ",text)
    text = re.sub('https?://S+/www\.\S+','',text) #remove urls
    text = re.sub('[%s]' % re.escape(string.punctuation),'',text) #remove punctuations
    text = re.sub('<.*?>+','',text) #remove HTML tags
    text = re.sub('\w*\d\w*','',text) #remove words with digits
    text = re.sub('\n','',text) #remove new lines
    
    return text

#Apply the preprocessing
data['text'] = data['text'].apply(preprocess)

x,y = data['text'], data['label']

#split the dataset for training and testing
x_train,x_test,y_train,y_test = train_test_split(x,y, test_size = 0.2)

#Initialize the vectorizer
vectorizer = TfidfVectorizer(stop_words = "english", max_df = 0.7)

#vectorize the split sets
x_train_vectorized = vectorizer.fit_transform(x_train)
x_test_vectorized = vectorizer.transform(x_test)


In [13]:
#The LogisticRegression classifier
from sklearn.linear_model import LogisticRegression 
LR = LogisticRegression()
LR.fit(x_train_vectorized, y_train)
LR.score(x_test_vectorized, y_test)

0.984521158129176

In [14]:
#Define a function that will produce the predictions given a text input
def Detect(news):
    news_txt = {'text':[news]} #create a dictionary to prepare input for format conversion to a dataframe
    news_data = pd.DataFrame(news_txt) 
    news_data['text'] = news_data['text'].apply(preprocess)
    new_x_test = news_data['text']
    new_x_test_vectorized = vectorizer.transform(new_x_test)
    
    #classifier prediction
    predict_lr = LR.predict(new_x_test_vectorized)
    
    result = "\n\nLR Prediction: {}".format(
           predict_lr[0]
    )
    
    return result

News = str(input())
print(Detect(News))

 Sheriff David Clarke Becomes An Internet Joke For Threatening To Poke People â€˜In The Eyeâ€™,"On Friday, it was revealed that former Milwaukee Sheriff David Clarke, who was being considered for Homeland Security Secretary in Donald Trump s administration, has an email scandal of his own.In January, there was a brief run-in on a plane between Clarke and fellow passenger Dan Black, who he later had detained by the police for no reason whatsoever, except that maybe his feelings were hurt. Clarke messaged the police to stop Black after he deplaned, and now, a search warrant has been executed by the FBI to see the exchanges.Clarke is calling it fake news even though copies of the search warrant are on the Internet. I am UNINTIMIDATED by lib media attempts to smear and discredit me with their FAKE NEWS reports designed to silence me,  the former sheriff tweeted.  I will continue to poke them in the eye with a sharp stick and bitch slap these scum bags til they get it. I have been attacked 

In [15]:
News = str(input())
print(Detect(News))

On Monday, a senior government official sparked outrage after making controversial remarks during a public event. The official, who remains unnamed, reportedly made disparaging comments about minority communities, prompting widespread condemnation from civil rights groups and political opponents.  According to witnesses at the event, the official claimed that certain minority groups were responsible for societal unrest and economic downturns. These remarks were met with immediate backlash on social media, with many users calling for the official's resignation and condemning the divisive rhetoric.  In response to the criticism, the official doubled down on their statements, stating that they would not be intimidated by media attempts to smear their reputation with false accusations. They vowed to continue advocating for policies they believe will strengthen national security and restore economic prosperity.  Despite the controversy, supporters of the official rallied behind them, citing