In [None]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import nltk
import re
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize

from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

import tensorflow as tf
from tensorflow.keras.preprocessing import text, sequence
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, LSTM, Dropout
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.callbacks import History

from wordcloud import WordCloud, STOPWORDS

In [None]:
fake = pd.read_csv('/content/drive/My Drive/data/Fake.csv', delimiter = ',')
true = pd.read_csv('/content/drive/My Drive/data/True.csv', delimiter = ',')

In [None]:
fake_and_true = pd.read_csv('/content/drive/My Drive/data/fake_or_real_news.csv', delimiter=',')
fake_and_true.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL


In [None]:
fake_and_true["text"] = fake_and_true["title"] + " " + fake_and_true["text"]

fake_and_true.loc[fake_and_true["label"]=='FAKE',"label"] = 0
fake_and_true.loc[fake_and_true["label"]=='REAL',"label"]= 1
fake_and_true.drop(columns= ['title','Unnamed: 0'], inplace=True)

In [None]:
fake['label']= 0
true['label']= 1

dataset =pd.DataFrame()
dataset = true.append(fake)

In [None]:
fake_and_true.head()

Unnamed: 0,text,label
0,You Can Smell Hillary’s Fear Daniel Greenfield...,0
1,Watch The Exact Moment Paul Ryan Committed Pol...,0
2,Kerry to go to Paris in gesture of sympathy U....,1
3,Bernie supporters on Twitter erupt in anger ag...,0
4,The Battle of New York: Why This Primary Matte...,1


In [None]:
dataset.head()

Unnamed: 0,title,text,subject,date,label
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


In [None]:
dataset["text"] = dataset["title"] + " " + dataset["text"]
dataset.drop(columns= ['title','subject', 'date'], inplace=True)

dataset = pd.concat([fake_and_true, dataset], ignore_index=True)
dataset.info

<bound method DataFrame.info of                                                     text label
0      You Can Smell Hillary’s Fear Daniel Greenfield...     0
1      Watch The Exact Moment Paul Ryan Committed Pol...     0
2      Kerry to go to Paris in gesture of sympathy U....     1
3      Bernie supporters on Twitter erupt in anger ag...     0
4      The Battle of New York: Why This Primary Matte...     1
...                                                  ...   ...
51228  McPain: John McCain Furious That Iran Treated ...     0
51229  JUSTICE? Yahoo Settles E-mail Privacy Class-ac...     0
51230  Sunnistan: US and Allied ‘Safe Zone’ Plan to T...     0
51231  How to Blow $700 Million: Al Jazeera America F...     0
51232  10 U.S. Navy Sailors Held by Iranian Military ...     0

[51233 rows x 2 columns]>

In [None]:
dataset.head()

Unnamed: 0,text,label
0,You Can Smell Hillary’s Fear Daniel Greenfield...,0
1,Watch The Exact Moment Paul Ryan Committed Pol...,0
2,Kerry to go to Paris in gesture of sympathy U....,1
3,Bernie supporters on Twitter erupt in anger ag...,0
4,The Battle of New York: Why This Primary Matte...,1


In [None]:
dataset['label'] = dataset['label'].astype('int')

In [None]:
len(dataset)

51233

### clean data

In [None]:
#nltk.download('stopwords')
porter_stemmer = PorterStemmer()

stemmed_text = []
for text in dataset['text']:
    result = re.sub('[^a-zA-Z]', ' ', text)
    result = result.lower()
    result = result.split()
    result = [r for r in result if r not in set(stopwords.words('english'))]
    stemmed_result = [porter_stemmer.stem(r) for r in result]
    stemmed_text.append(" ".join(stemmed_result))

In [None]:
cleaned_text = pd.Series(stemmed_text)

In [None]:
cleaned_text.to_csv('/content/drive/My Drive/data/news_df.csv', index = False)

In [None]:
dataset['cleaned_text'] = cleaned_text

In [None]:
dataset.to_csv('/content/drive/My Drive/data/data.csv', index = False)

In [None]:
data = pd.read_csv('/content/drive/My Drive/data/dataset_sec.csv',delimiter=',')

In [None]:
data.drop(columns= ['publication', 'Unnamed: 0'], inplace=True)

In [None]:
data["text"] = data["title"] + " " + data["content"]
data.drop(columns= ['title','content'], inplace=True)

In [None]:
data.loc[data["label"]=='fake',"label"] = 0
data.loc[data["label"]=='real',"label"]= 1

In [None]:
len(datat[datat['label']=='real'])

15712

In [None]:
data.head()

Unnamed: 0,label,text
0,0,Muslims BUSTED: They Stole Millions In Gov’t B...
1,0,Re: Why Did Attorney General Loretta Lynch Ple...
2,0,BREAKING: Weiner Cooperating With FBI On Hilla...
3,0,PIN DROP SPEECH BY FATHER OF DAUGHTER Kidnappe...
4,0,FANTASTIC! TRUMP'S 7 POINT PLAN To Reform Heal...


In [None]:
data2 = pd.read_csv('/content/drive/My Drive/data/data_small_sec.csv', delimiter=',')
data2_fake = data[data['label']==0][:50]
data2_true = data[data['label']==1][:50]

NameError: ignored

In [None]:
data2_true.head()

Unnamed: 0,text,label
12273,Muslims BUSTED: They Stole Millions In Gov’t B...,1
12274,Re: Why Did Attorney General Loretta Lynch Ple...,1
12275,BREAKING: Weiner Cooperating With FBI On Hilla...,1
12276,PIN DROP SPEECH BY FATHER OF DAUGHTER Kidnappe...,1
12277,FANTASTIC! TRUMP'S 7 POINT PLAN To Reform Heal...,1


In [None]:
data.to_csv('/content/drive/My Drive/data/data_small_sec.csv', index = False)

In [None]:
data2_fake.to_csv('/content/drive/My Drive/data/data2_fake.csv', index = False)
data2_true.to_csv('/content/drive/My Drive/data/data2_true.csv', index = False)

In [None]:
data = pd.read_csv('/content/drive/My Drive/data/dataset.csv', delimiter=',')
data1_fake = data[data['label']==0][:50]
data1_true = data[data['label']==1][:50]

In [None]:
data1_fake.to_csv('/content/drive/My Drive/data/data1_fake.csv', index = False)
data1_true.to_csv('/content/drive/My Drive/data/data1_true.csv', index = False)