# Imports

In [1]:
import pandas as pd
import numpy as np
import nltk
import re
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer
from nltk.tokenize import word_tokenize,sent_tokenize
import category_encoders as ce
from sklearn.model_selection import train_test_split

In [2]:
def remove_label(df, label):
    x = df.drop(label, axis=1)
    y = df[label].copy()
    return (x,y)



In [4]:
nltk.download('stopwords','wordnet')

[nltk_data] Downloading package stopwords to wordnet...
[nltk_data]   Unzipping corpora\stopwords.zip.


True

In [6]:
true = pd.read_csv('True.csv')
false = pd.read_csv('Fake.csv')
sarc = pd.read_json('Sarcasm_Headlines_Dataset_v2.json', lines=True)
real = pd.read_json('News_Category_Dataset_v3.json', lines=True)

# Data Preprocessing

In [8]:
true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [9]:
false.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [10]:
sarc.head()

Unnamed: 0,is_sarcastic,headline,article_link
0,1,thirtysomething scientists unveil doomsday clo...,https://www.theonion.com/thirtysomething-scien...
1,0,dem rep. totally nails why congress is falling...,https://www.huffingtonpost.com/entry/donna-edw...
2,0,eat your veggies: 9 deliciously different recipes,https://www.huffingtonpost.com/entry/eat-your-...
3,1,inclement weather prevents liar from getting t...,https://local.theonion.com/inclement-weather-p...
4,1,mother comes pretty close to using word 'strea...,https://www.theonion.com/mother-comes-pretty-c...


In [11]:
real.head()

Unnamed: 0,link,headline,category,short_description,authors,date
0,https://www.huffpost.com/entry/covid-boosters-...,Over 4 Million Americans Roll Up Sleeves For O...,U.S. NEWS,Health experts said it is too early to predict...,"Carla K. Johnson, AP",2022-09-23
1,https://www.huffpost.com/entry/american-airlin...,"American Airlines Flyer Charged, Banned For Li...",U.S. NEWS,He was subdued by passengers and crew when he ...,Mary Papenfuss,2022-09-23
2,https://www.huffpost.com/entry/funniest-tweets...,23 Of The Funniest Tweets About Cats And Dogs ...,COMEDY,"""Until you have a dog you don't understand wha...",Elyse Wanshel,2022-09-23
3,https://www.huffpost.com/entry/funniest-parent...,The Funniest Tweets From Parents This Week (Se...,PARENTING,"""Accidentally put grown-up toothpaste on my to...",Caroline Bologna,2022-09-23
4,https://www.huffpost.com/entry/amy-cooper-lose...,Woman Who Called Cops On Black Bird-Watcher Lo...,U.S. NEWS,Amy Cooper accused investment firm Franklin Te...,Nina Golgowski,2022-09-22


In [6]:
true['is_true'] = 1

In [7]:
true['is_true'].head()

0    1
1    1
2    1
3    1
4    1
Name: is_true, dtype: int64

In [8]:
false['is_true'] = 0

In [9]:
news = pd.concat([true,false], ignore_index= 'True')

In [10]:
news.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44898 entries, 0 to 44897
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    44898 non-null  object
 1   text     44898 non-null  object
 2   subject  44898 non-null  object
 3   date     44898 non-null  object
 4   is_true  44898 non-null  int64 
dtypes: int64(1), object(4)
memory usage: 1.7+ MB


In [11]:
news.head()

Unnamed: 0,title,text,subject,date,is_true
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


In [35]:
news.drop('date', axis = 1)

Unnamed: 0,title,text,subject,is_true
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,1
...,...,...,...,...
44893,McPain: John McCain Furious That Iran Treated ...,21st Century Wire says As 21WIRE reported earl...,Middle-east,0
44894,JUSTICE? Yahoo Settles E-mail Privacy Class-ac...,21st Century Wire says It s a familiar theme. ...,Middle-east,0
44895,Sunnistan: US and Allied ‘Safe Zone’ Plan to T...,Patrick Henningsen 21st Century WireRemember ...,Middle-east,0
44896,How to Blow $700 Million: Al Jazeera America F...,21st Century Wire says Al Jazeera America will...,Middle-east,0


In [36]:
news['subject'].value_counts()

subject
politicsNews       11272
worldnews          10145
News                9050
politics            6841
left-news           4459
Government News     1570
US_News              783
Middle-east          778
Name: count, dtype: int64

In [40]:
news['is_true'].value_counts()

is_true
0    23481
1    21417
Name: count, dtype: int64

In [12]:
news['combined'] = news['title'] + ' ' + news['text']
news['combined'].head()

0    As U.S. budget fight looms, Republicans flip t...
1    U.S. military to accept transgender recruits o...
2    Senior U.S. Republican senator: 'Let Mr. Muell...
3    FBI Russia probe helped by Australian diplomat...
4    Trump wants Postal Service to charge 'much mor...
Name: combined, dtype: object

In [13]:
print(news['combined'][0])

As U.S. budget fight looms, Republicans flip their fiscal script WASHINGTON (Reuters) - The head of a conservative Republican faction in the U.S. Congress, who voted this month for a huge expansion of the national debt to pay for tax cuts, called himself a “fiscal conservative” on Sunday and urged budget restraint in 2018. In keeping with a sharp pivot under way among Republicans, U.S. Representative Mark Meadows, speaking on CBS’ “Face the Nation,” drew a hard line on federal spending, which lawmakers are bracing to do battle over in January. When they return from the holidays on Wednesday, lawmakers will begin trying to pass a federal budget in a fight likely to be linked to other issues, such as immigration policy, even as the November congressional election campaigns approach in which Republicans will seek to keep control of Congress. President Donald Trump and his Republicans want a big budget increase in military spending, while Democrats also want proportional increases for non-

# Reloading Processed Data

In [4]:
news = pd.read_csv('combined news.csv', index_col='id')
news.head()

Unnamed: 0_level_0,title,text,subject,date,is_true,combined,lemmed,stemmed
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1,"As U.S. budget fight looms, Republicans flip t...",usa budget fight loom republican flip fiscal s...,usa budget fight loom republican flip fiscal s...
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1,U.S. military to accept transgender recruits o...,usa military accept transgender recruit monday...,usa militari accept transgend recruit monday p...
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1,Senior U.S. Republican senator: 'Let Mr. Muell...,senior usa republican senator let mr mueller j...,senior usa republican senat let mr mueller job...
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1,FBI Russia probe helped by Australian diplomat...,fbi russia probe helped australian diplomat ti...,fbi russia probe help australian diplomat tip ...
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1,Trump wants Postal Service to charge 'much mor...,trump want postal service charge much amazon s...,trump want postal servic charg much amazon shi...


# Word thingies

In [8]:
port_stem = PorterStemmer()
def stemming(content):
    stemmed_content = re.sub(r'U.S.', 'usa', content)
    stemmed_content = re.sub('[^a-zA-Z0-9]', ' ', stemmed_content)
    stemmed_content = stemmed_content.lower()
    stemmed_content = stemmed_content.lower()
    stemmed_content = stemmed_content.split()
    stemmed_content = [port_stem.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
    stemmed_content = ' '.join(stemmed_content)

    return stemmed_content


lemmatizer = WordNetLemmatizer()
def lemming(content):
    lemmed_content = re.sub(r'U.S.', 'usa', content)
    lemmed_content = re.sub('[^a-zA-Z0-9]', ' ', lemmed_content)
    lemmed_content = lemmed_content.lower()
    lemmed_content = lemmed_content.split()
    lemmed_content = [lemmatizer.lemmatize(word) for word in lemmed_content if not word in stopwords.words('english')]
    lemmed_content = ' '.join(lemmed_content)
    return lemmed_content


In [18]:
news['stemmed'] =news['combined'].apply(stemming) 

In [17]:
news['lemmed'] = news['combined'].apply(lemming)

In [20]:
news.head()

Unnamed: 0,title,text,subject,date,is_true,combined,lemmed,stemmed
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1,"As U.S. budget fight looms, Republicans flip t...",usa budget fight loom republican flip fiscal s...,usa budget fight loom republican flip fiscal s...
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1,U.S. military to accept transgender recruits o...,usa military accept transgender recruit monday...,usa militari accept transgend recruit monday p...
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1,Senior U.S. Republican senator: 'Let Mr. Muell...,senior usa republican senator let mr mueller j...,senior usa republican senat let mr mueller job...
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1,FBI Russia probe helped by Australian diplomat...,fbi russia probe helped australian diplomat ti...,fbi russia probe help australian diplomat tip ...
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1,Trump wants Postal Service to charge 'much mor...,trump want postal service charge much amazon s...,trump want postal servic charg much amazon shi...


In [19]:
news.to_csv('combined news')

In [70]:
news.drop(['text','date'], axis = 1)

Unnamed: 0,title,subject,is_true,combined,stemmed,lemmed
0,"As U.S. budget fight looms, Republicans flip t...",politicsNews,1,"As U.S. budget fight looms, Republicans flip t...",usa budget fight loom republican flip fiscal s...,usa budget fight loom republican flip fiscal s...
1,U.S. military to accept transgender recruits o...,politicsNews,1,U.S. military to accept transgender recruits o...,usa militari accept transgend recruit monday p...,usa military accept transgender recruit monday...
2,Senior U.S. Republican senator: 'Let Mr. Muell...,politicsNews,1,Senior U.S. Republican senator: 'Let Mr. Muell...,senior usa republican senat let mr mueller job...,senior usa republican senator let mr mueller j...
3,FBI Russia probe helped by Australian diplomat...,politicsNews,1,FBI Russia probe helped by Australian diplomat...,fbi russia probe help australian diplomat tip ...,fbi russia probe helped australian diplomat ti...
4,Trump wants Postal Service to charge 'much mor...,politicsNews,1,Trump wants Postal Service to charge 'much mor...,trump want postal servic charg much amazon shi...,trump want postal service charge much amazon s...
...,...,...,...,...,...,...
44893,McPain: John McCain Furious That Iran Treated ...,Middle-east,0,McPain: John McCain Furious That Iran Treated ...,mcpain john mccain furiou iran treat us sailor...,mcpain john mccain furious iran treated u sail...
44894,JUSTICE? Yahoo Settles E-mail Privacy Class-ac...,Middle-east,0,JUSTICE? Yahoo Settles E-mail Privacy Class-ac...,justic yahoo settl e mail privaci class action...,justice yahoo settle e mail privacy class acti...
44895,Sunnistan: US and Allied ‘Safe Zone’ Plan to T...,Middle-east,0,Sunnistan: US and Allied ‘Safe Zone’ Plan to T...,sunnistan us alli safe zone plan take territor...,sunnistan u allied safe zone plan take territo...
44896,How to Blow $700 Million: Al Jazeera America F...,Middle-east,0,How to Blow $700 Million: Al Jazeera America F...,blow 700 million al jazeera america final call...,blow 700 million al jazeera america finally ca...


# Splitting

In [52]:
Y = news.is_true.values
X = news.lemmed.values

In [53]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3, random_state = 0)
X_train

array(['breaking finally new wikileaks email going dump email latest wikileaks email evidence smoke hillary concerned place middle fire fbi finally take appropriate action give hillary treatment every american would receive caught major cover destruction evidence subpoena hacker friendly private email server appear penetrated foreign intelligence agency clinton company aaron burr bradley manning edward snowden betrayed public trust failure hold accountable yet proof clinton law matter legal system always treat kid glove frontpagein today 25th wikileaks release hacked podesta email one notable highlight march 2 2015 exchange john podesta clinton aide cheryl mill clinton campaign chair say another matter sound like lanny going dump email email may indicate intent sent day nyt story revealing existence hillary email server hillary press conference addressing time stunning revelation personal email account server home mill response think got new nick name unclear lanny referred infamous fo

In [54]:
y_train

array([0, 1, 0, ..., 0, 0, 1], dtype=int64)

## Vectorising

In [55]:
vector = TfidfVectorizer()
Nvectorize = CountVectorizer(ngram_range=(1,2))
X_train = vector.fit_transform(X_train)
X_test = vector.transform(X_test)

In [42]:

X_train

<1x1 sparse matrix of type '<class 'numpy.float64'>'
	with 1 stored elements in Compressed Sparse Row format>

# Model Stuff

In [56]:

from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter = 1000)
model.fit(X_train, y_train)

In [58]:
from sklearn.metrics import accuracy_score 
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(y_train, X_train_prediction)
print('Accuracy on Training Data: ', training_data_accuracy)

Accuracy on Training Data:  0.9919498536337025


In [60]:
X_test_prediction = model.predict(X_test)
testing_data_accuracy = accuracy_score(y_test, X_test_prediction)
print('Accuracy on Test Data: ', testing_data_accuracy)

Accuracy on Test Data:  0.9876020786933927


In [73]:
import pickle
filename = 'Content-Truth.sav'
pickle.dump(model,open(filename, 'wb'))
filename2 = 'TFIDF.sav'
pickle.dump(vector, open(filename2, 'wb'))

In [71]:
tester = 'Barron Shows Up On Trump’s Doorstep Claiming To Be His Son PALM BEACH, FL—Nervously standing on the porch with a duffel bag slung over his shoulder, Barron Trump reportedly showed up on former President Donald Trump’s doorstep Thursday claiming to be his son. “Excuse me, sir, I’m sorry to bother you, but I believe you might be my father,” said the younger Trump, who explained that his name was Barron and his mother’s name was Melania, and who did not appear to notice as the 77-year-old Trump slowly edged back inside the house. “Growing up, my mother always told me that my dad died a long, long time ago and said not to ask any questions. I think you met her in New York? I was born in 2006. Anyway, one day she let it slip that you were still out there, so after I turned 18, I decided to go down to the county clerk’s office and request my birth certificate—and, well, your name was on it. You are Donald J. Trump, correct? I thought maybe we could just talk for a bit, if that’s okay.” At press time, the elder Trump had slammed the door and threatened legal action if Barron ever showed up again. '

In [None]:
load_model = pickle.load(open('LR-Lemm-TFIDF.sav', 'rb'))
load_vector = pickle.load(open('TFIDF.sav', 'rb'))




In [9]:
import pickle
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

def load_model(model_file):
    with open(model_file, 'rb') as file:
        model = pickle.load(file)
    return model

def load_vectorizer(vector_file):
    with open(vector_file, 'rb') as file:
        vector = pickle.load(file)
    return vector

def preprocess_text(text):
    lemmatizer = WordNetLemmatizer()
    text = re.sub(r'U.S.', 'usa', text)
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = text.lower()
    text = text.split()
    text = [lemmatizer.lemmatize(word) for word in text if not word in stopwords.words('english')]
    text = ' '.join(text)
    return text

def predict_reality(text, model, vector):

    preprocessed_text = preprocess_text(text)
    vectorised_text = vector.transform([preprocessed_text])
    reality = model.predict(vectorised_text)
    return reality[0]


In [10]:
def main(text):
    model = load_model("LR-Lemm-TFIDF.sav")
    vectorizer = load_vectorizer("TFIDF.sav")
    reality = predict_reality(text, model, vectorizer)
    return reality
    # 1 is_true

In [72]:
main(tester)

negative


# Mapping Content to Subject

## <span style ="color:#f2a">Data not tagged in a practical manner, this Section is Useless Ignore</span>

In [21]:
news['subject'].unique()

array(['politicsNews', 'worldnews', 'News', 'politics', 'Government News',
       'left-news', 'US_News', 'Middle-east'], dtype=object)

In [26]:
newser = news.drop(['title', 'text', 'date', 'is_true','lemmed','stemmed'], axis = 1)
newser.head()

Unnamed: 0,subject,combined
0,politicsNews,"As U.S. budget fight looms, Republicans flip t..."
1,politicsNews,U.S. military to accept transgender recruits o...
2,politicsNews,Senior U.S. Republican senator: 'Let Mr. Muell...
3,politicsNews,FBI Russia probe helped by Australian diplomat...
4,politicsNews,Trump wants Postal Service to charge 'much mor...


In [27]:
encoder = ce.OneHotEncoder(cols=['subject'], use_cat_names=True)
newser = encoder.fit_transform(newser)

In [28]:
newser.head()

Unnamed: 0,subject_politicsNews,subject_worldnews,subject_News,subject_politics,subject_Government News,subject_left-news,subject_US_News,subject_Middle-east,combined
0,1,0,0,0,0,0,0,0,"As U.S. budget fight looms, Republicans flip t..."
1,1,0,0,0,0,0,0,0,U.S. military to accept transgender recruits o...
2,1,0,0,0,0,0,0,0,Senior U.S. Republican senator: 'Let Mr. Muell...
3,1,0,0,0,0,0,0,0,FBI Russia probe helped by Australian diplomat...
4,1,0,0,0,0,0,0,0,Trump wants Postal Service to charge 'much mor...


In [None]:
subjects = ['subject_politicsNews', 'subject_worldnews','subject_News','subject_politics','subject_Government News', 'subject_left-news', 'subject_Middle-east 	']

# Some Data stuff

In [7]:
# Chart - 2 WordCloud Plot Visualization Code For Most Used Words in Spam Messages
# Create a String to Store All The Words
comment_words = ''

import matplotlib.pyplot as plt
Fake = false['text']
# Remove The Stopwords
doggy = stopwords
stopwords = (doggy)
# Iterate Through The Column
for val in false.text:

    # Typecaste Each Val to String
    val = str(val)

    # Split The Value
    tokens = val.split()

    # Converts Each Token into lowercase
    for i in range(len(tokens)):
        tokens[i] = tokens[i].lower()

    comment_words += " ".join(tokens)+" "

# Set Parameters
wordcloud = WordCloud(width = 1000, height = 500,
                background_color ='white',
                stopwords = stopwords,
                min_font_size = 10,
                max_words = 1000,
                colormap = 'gist_heat_r').generate(comment_words)

# Set Labels
plt.figure(figsize = (6,6), facecolor = None)
plt.title('Most Used Words In our Spam Messages', fontsize = 15, pad=20)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)

# Display Chart
plt.show()

KeyboardInterrupt: 

# idk

In [14]:
done = False
while not done:
    message = input("Do you have news")
    if message == "no" or "N" or "N":
        print("okay")
        done = True
    else:
        news_to_check  = input("Please submit a headline")

        if main(news_to_check) == 1:
            print("this headline seems pretty legit")
        else:
            print("seems pretty sus")

Do you have news N
