# Testing the model on NEURAL FAKE NEWS

In [2]:
from keras_preprocessing.text import tokenizer_from_json
import json

In [3]:
with open('./tokenizer.json') as f:
    data = json.load(f)
    tokenizer = tokenizer_from_json(data)

In [4]:
import pickle
import pandas as pd
import numpy as np
#This class allows to vectorize a text corpus, by turning each text into either a sequence of integers (each integer being the index of a token in a dictionary) 
#or into a vector where the coefficient for each token 
from tensorflow.keras.preprocessing.text import Tokenizer

#Converts a text to a sequence of indexes in a fixed-size hashing space.
from tensorflow.keras.preprocessing.text import hashing_trick

#Converts a text to a sequence of words (or tokens).
from tensorflow.keras.preprocessing.text import text_to_word_sequence

from tensorflow.keras.preprocessing import sequence

from tensorflow.keras.preprocessing.sequence import pad_sequences

#BaseEstimator and ClassifierMixin classes from Scikit-learn, allowing it to behave like a Scikit-learn estimator.
from sklearn.base import BaseEstimator, ClassifierMixin

#Accuracy metrics for model
from sklearn.metrics import accuracy_score

from tensorflow.keras.models import Sequential, load_model

#lets us create embedding of words that represent the meaning of the words in relation to other words.
from tensorflow.keras.layers import Embedding, LSTM, Dense, Conv1D, MaxPooling1D, Dropout

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from tensorflow.keras.layers import Dense


# Importing Data and Model Features

In [5]:
tokenizer

<keras_preprocessing.text.Tokenizer at 0x1069fbb20>

In [6]:
#Loading LSTM model 
from tensorflow import keras
model = keras.models.load_model('./Keras_Model/LSTM_93%ACC.h5')

In [10]:
model2 = keras.models.load_model('./Keras_Model/LSTM_95%recall.h5')

ValueError: ('Unrecognized keyword arguments:', dict_keys(['ragged']))

In [7]:
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 320, 16)           9600      
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 320, 32)           2592      
_________________________________________________________________
max_pooling1d_4 (MaxPooling1 (None, 160, 32)           0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 160, 64)           10304     
_________________________________________________________________
max_pooling1d_5 (MaxPooling1 (None, 80, 64)            0         
_________________________________________________________________
conv1d_6 (Conv1D)            (None, 80, 128)           41088     
_________________________________________________________________
max_pooling1d_6 (MaxPooling1 (None, 40, 128)          

### Preparing Neural Fake Articles (GROVER Model)

In [8]:
fake_putin = pd.read_csv('./Grover-Fakes/fake_Grove_articles_Putin2.csv')

In [9]:
fake_putin = fake_putin['text'].reset_index().drop('index', axis = 1)

In [10]:
fake_putin

Unnamed: 0,text
0,Vladimir Putin’s regime has created a complex ...
1,Vladimir Putin spent part of Friday listening ...
2,Military strategist Peter W. Singer has had hi...
3,Do the Russians look forward to continued frie...


In [11]:
fake_putin2 = pd.read_csv('./Grover-Fakes/fake_Grove_articles_Putin1.csv')
fake_putin2 = fake_putin2['text'].reset_index().drop('index', axis = 1)

In [12]:
fake_putin2

Unnamed: 0,text
0,Faced with mounting pressure from the U.S. adm...
1,"Vladimir Putin, who has ruled Russia since 200..."
2,Russia’s Vladimir Putin is accused of conspiri...


In [13]:
fake_putin = pd.concat([fake_putin, fake_putin2]).reset_index()

In [14]:
fake_putin = fake_putin.drop('index', axis = 1)

In [15]:
fake_putin

Unnamed: 0,text
0,Vladimir Putin’s regime has created a complex ...
1,Vladimir Putin spent part of Friday listening ...
2,Military strategist Peter W. Singer has had hi...
3,Do the Russians look forward to continued frie...
4,Faced with mounting pressure from the U.S. adm...
5,"Vladimir Putin, who has ruled Russia since 200..."
6,Russia’s Vladimir Putin is accused of conspiri...


In [16]:
fake_trump = pd.read_csv('./Grover-Fakes/fake_Grove_articles_trump1.csv')

In [17]:
fake_trump2 = pd.read_csv('./Grover-Fakes/fake_Grove_articles_trump2.csv')

In [18]:
fake_trump = fake_trump['text'].reset_index().drop('index', axis = 1)

In [19]:
fake_trump2 = fake_trump2['text'].reset_index().drop('index', axis = 1)

In [20]:
fake_trump = pd.concat([fake_trump, fake_trump2]).reset_index()

In [21]:
fake_trump.drop('index', axis = 1)

Unnamed: 0,text
0,The White House is actively searching for a re...
1,"Back in the 1920s, Mr. Trump was just a teenag..."
2,President Donald Trump is upset with the media...
3,"Army Chief of Staff Gen. Mark Milley, who also..."
4,"At a dinner on Saturday night, President Trump..."
5,President Donald Trump made clear in testimony...
6,"On April 22, 2017, Donald Trump met with South..."
7,I have signed the Abolish Minimum Wage Act. — ...


In [22]:
fake_biden = pd.read_csv('./Grover-Fakes/fake_Grove_articles_biden2.csv')

fake_bernie = pd.read_csv('./Grover-Fakes/fake_Grove_articles_bernie1.csv')

fake_biden = fake_biden['text'].reset_index().drop('index', axis = 1)

fake_bernie= fake_bernie['text'].reset_index().drop('index', axis = 1)

fake_biden = pd.concat([fake_biden, fake_bernie]).reset_index()

fake_biden.drop('index', axis = 1)

Unnamed: 0,text
0,The subject of sex is far more complicated tha...
1,Joe Biden kissed Barack Obama on the cheek as ...
2,It is ironic that during a moment when politic...
3,Asked this week why he didn’t describe himself...
4,"Vice President Biden, in a new poll, scored th..."
5,It’s always tough to get a former vice preside...
6,When President Trump signed an executive order...
7,Six years after announcing their “gladness” th...
8,"It’s the Cold War horror, frozen in history, t..."
9,"Former vice president Joe Biden, who is mullin..."


In [23]:
fakes = pd.concat([fake_putin, fake_trump])

In [24]:
fakes = pd.concat([fakes, fake_biden])

In [25]:
fakes

Unnamed: 0,text,index
0,Vladimir Putin’s regime has created a complex ...,
1,Vladimir Putin spent part of Friday listening ...,
2,Military strategist Peter W. Singer has had hi...,
3,Do the Russians look forward to continued frie...,
4,Faced with mounting pressure from the U.S. adm...,
5,"Vladimir Putin, who has ruled Russia since 200...",
6,Russia’s Vladimir Putin is accused of conspiri...,
0,The White House is actively searching for a re...,0.0
1,"Back in the 1920s, Mr. Trump was just a teenag...",1.0
2,President Donald Trump is upset with the media...,2.0


In [90]:
fakes = fakes.drop('index', axis =1).reset_index()

In [91]:
fakes

Unnamed: 0,index,text,label
0,0,Vladimir Putin’s regime has created a complex ...,1.0
1,1,Vladimir Putin spent part of Friday listening ...,1.0
2,2,Military strategist Peter W. Singer has had hi...,1.0
3,3,Do the Russians look forward to continued frie...,1.0
4,4,Faced with mounting pressure from the U.S. adm...,1.0
5,5,"Vladimir Putin, who has ruled Russia since 200...",1.0
6,6,Russia’s Vladimir Putin is accused of conspiri...,1.0
7,7,The White House is actively searching for a re...,1.0
8,8,"Back in the 1920s, Mr. Trump was just a teenag...",1.0
9,9,President Donald Trump is upset with the media...,1.0


In [92]:
fakes.drop('index', axis = 1)

Unnamed: 0,text,label
0,Vladimir Putin’s regime has created a complex ...,1.0
1,Vladimir Putin spent part of Friday listening ...,1.0
2,Military strategist Peter W. Singer has had hi...,1.0
3,Do the Russians look forward to continued frie...,1.0
4,Faced with mounting pressure from the U.S. adm...,1.0
5,"Vladimir Putin, who has ruled Russia since 200...",1.0
6,Russia’s Vladimir Putin is accused of conspiri...,1.0
7,The White House is actively searching for a re...,1.0
8,"Back in the 1920s, Mr. Trump was just a teenag...",1.0
9,President Donald Trump is upset with the media...,1.0


In [93]:
fakes['label'] = 1

In [95]:
fakes = fakes.drop('index', axis = 1)

In [97]:
fakes.to_csv('grover_fakes.csv')

# GPT-2 Fakes Lemmatization


In [14]:
from spacy.lang.en.stop_words import STOP_WORDS
import spacy
import regex as re

In [15]:
nlp = spacy.load('en_core_web_md')

In [16]:
def text_preprocessor(text):
    text = re.sub('<[^>]*>', '', text) # Effectively removes HTML markup tags
    emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', text)
    text = re.sub('[\W]+', ' ', text.lower()) + ' '.join(emoticons).replace('-', '')
    
    doc = nlp(text)
    #Lemmatization, which is the process of reducing a word to its lemma or dictionary form. 
    #For example, the word run is the lemma for the words runs, ran, and running.
    text = ' '.join([token.lemma_ for token in doc if token.text not in STOP_WORDS])
    return text

In [26]:
gpt_fakes = pd.read_csv('./GPT2-Model-Fakes/gpt2_gentext_20210512_133139.txt', sep = '*',names = ['text'])


In [27]:
gpt_fakes.index

Index(['President Putin’s children, also known as GRU soldiers, allegedly trained the militants to kill US and Westerners, the BBC reports.A police officer looks at damage following a double car bomb attack in the Al-Zahraa neighborhood of the Homs, Syria, on February 21, 2016. The ISIS militant group ISIS has claimed responsibility for the attack that left at least one person dead and three more wounded. The officer was visiting the scene of another deadly attack in the city, which left at least one person dead and three more wounded.Syrians wait near the Turkish border during clashes between ISIS and Kurdish armed groups in Kobani, Syria, on Thursday, February 23. The photo was taken in Sanliurfa, Turkey. ISIS militants disguised as Kurdish security forces infiltrated Kobani on Thursday and killed "many civilians," said a spokesman for the Kurds in Kobani. Syrians wait near the Turkish border during clashes between ISIS and Kurdish armed groups in Kobani, Syria, on Thursday, February

In [28]:
gpt = gpt_fakes.reset_index()

In [29]:
gpt

Unnamed: 0,index,text
0,"President Putin’s children, also known as GRU ...",
1,"President Putin, the KGB officer who had infil...",
2,President Putin has said the United States sho...,"President Putin’s ""fighters"" in Syria. The New..."
3,President Putin and the KGB. There is an impli...,
4,President Putin. the old guard at the Oval Off...,
5,President Putin said the United States should ...,President Putin and the KGB. I’m going to cont...
6,"President Putin said at the meeting, attended ...",
7,"President Putin. Putin, who is widely despised...",
8,President Putin said: “If it weren’t for the K...,
9,President Putin will meet with US President Do...,


In [30]:
gpt.text= gpt['index']

In [31]:
gpt = gpt['text']

In [38]:
gpt = pd.DataFrame(gpt.apply(text_preprocessor))

In [56]:
gpt['label'] = 'FAKE'

NameError: name 'gpt' is not defined

## Lematization of Data

In [22]:
keep_n = 320

In [None]:
fakes['text'] = fakes.text.apply(text_preprocessor)

In [None]:
fakes

In [45]:
import pandas as pd
news = 'Bethlehem, West Bank, Gaza and Jerusalem (CNN)At least 10 Palestinians were killed Friday by Israeli forces during violent clashes in towns and cities across the West Bank, the Gaza-based Palestinian Health Ministry said in a statement, as sirens continued to blare all day across southern Israel, indicating another day of ceaseless rocket fire from Gaza. The violence in the occupied West Bank marks the largest number of people killed in a single day there for years.The Palestinian Red Crescent said Palestinians had been targeted with live ammunition, as well as rubber bullets and teargas. Israel has continued this week a bombing operation the government says is targeting Palestinian militants in Gaza, which has killed at least 126 people, including at least 31 children and 20 women, the Palestinian Health Ministry reported Friday. At least 950 people, including at least 254 children, have sustained injuries as a result of Israeli airstrikes, the Health Ministry added. According to the Palestinian Health Ministry, more than 500 Palestinians have been injured since clashes erupted earlier this week. The majority of those injured on Friday were "shot by live ammunition," the Health Ministry added.'

cnn = pd.DataFrame()
cnn['text'] = [news]


In [46]:
cnn

Unnamed: 0,text
0,"Bethlehem, West Bank, Gaza and Jerusalem (CNN)..."


In [49]:
onion = 'CHICAGO—Lauding the 22-year-old for her commitment to serving her community, sources confirmed Friday that local Taco Bell employee Nicole Cortez has had a far greater positive impact on people’s lives than any firefighter ever could. “Sure, there are plenty of firefighters who have rushed into a burning building to save a child, but that service pales in comparison to that of the woman whose Baja Blasts and Crunchwrap Supremes has brought smiles to thousands,” said Gail J. McGovern, president of the American Red Cross, who issued a statement announcing that Cortez would be honored in a ceremony at the organization’s headquarters in Washington D.C. in recognition of her outstanding tenacity, strength, and courage, all of which outshone the average employee of a local fire department. “Over the span of her 3-year career, Nicole has never failed to stuff generous handfuls of hot sauce packets into the bottom of a customer’s bag. That’s far more consequential than a firefighter preventing a home from going up in flames. She works night and day, ready to answer the call in the event a car enters the 24-hour drive-through window. It’s often a thankless job, but she’s a true hero.” At press time, community members were grieving after reports Cortez had perished due to smoke inhalation from a burning burrito.'

onion_df = pd.DataFrame()
onion_df['text'] = [onion]

In [50]:
onion_df['text'] = onion_df.text.apply(text_preprocessor)

In [53]:
onion = tokenizer.texts_to_sequences(onion_df.text)

In [54]:
padded_onion = pad_sequences(onion, maxlen = keep_n, padding='post')

In [47]:
cnn = tokenizer.texts_to_sequences(cnn)

In [48]:
cnn

[[]]

In [49]:
padded_cnn = pad_sequences(cnn, maxlen = keep_n, padding = 'post')

In [50]:
padded_cnn[0]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

## Tokenizing Fake Articles

In [None]:
#We apply our tokenizer to the new articles to get their representation using our models set of tokens
text_sequences = tokenizer.texts_to_sequences(fakes.text)

In [40]:
text_sequences2 = tokenizer.texts_to_sequences(gpt.text)

In [None]:
text_sequences

In [28]:
from src.config import n_tokens, keep_n, embedding_dim

In [None]:
padded_sequences = pad_sequences(text_sequences, maxlen = keep_n, padding = 'post')

In [44]:
padded_sequences2 = pad_sequences(text_sequences2, maxlen = keep_n, padding = 'post')

In [None]:
padded_sequences[0:2]

# Making Predictions

In [None]:
model.predict_classes(padded_sequences)

In [None]:
model2.predict_classes(padded_sequences)

In [45]:
pred_gpt2 = model.predict_classes(padded_sequences2)



In [47]:
model2.predict_classes(padded_sequences2)

array([[1],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0]], dtype=int32)

In [57]:
padded_sequences2.shape

(13, 320)

In [None]:
one = [pred_gpt2 == 1]

In [None]:
one

In [29]:
cnn_pred = model.predict_classes(padded_cnn)

In [67]:
cnn2 = model2.predict_classes(padded_cnn)

In [20]:
cnn2

NameError: name 'cnn2' is not defined

In [29]:
model.predict_classes(padded_cnn)

array([[0]], dtype=int32)

In [71]:
print(model2.predict_classes(padded_onion))

[[1]]


In [61]:
model2.predict_proba(padded_onion)



array([[0.9481555]], dtype=float32)