## Dependancies

In [1]:
import pandas as pd
import datetime
import warnings
warnings.filterwarnings("ignore")

## Import CSV's
- Fake CSV
- True CSV

In [2]:
fake_news = pd.read_csv('../data/clean-datasets/fake_news.csv')

In [3]:
fake_news.head()

Unnamed: 0,title,text,subject,date,year
0,JUST IN: CROOKED DOJ OFFICIAL Didn’t Reveal Th...,We ve been covering the curious case of DOJ of...,politics,2018-02-14,2018
1,POLITICS FIRST: LEFTIST PARENTS Of Former Comb...,The heartbreaking story of two parents who put...,politics,2018-02-15,2018
2,TWO HIGH SCHOOL JROTC Members Tell INCREDIBLE ...,ABC News When two Marjory Stoneman Douglas Hi...,politics,2018-02-15,2018
3,MEDIA PROMOTES Fake News That Shooter Connecte...,The mainstream media is guilty of fake news wh...,politics,2018-02-15,2018
4,"WHAT’S WRONG WITH PELOSI? Nancy Stutters, Stam...",What s wrong with Pelosi? Nancy Pelosi is supp...,politics,2018-02-15,2018


In [4]:
true_news = pd.read_csv('../data/kaggle/True.csv')

In [5]:
true_news.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


## Fake News - Data Exploration

In [6]:
fake_news.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23471 entries, 0 to 23470
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    23471 non-null  object
 1   text     23471 non-null  object
 2   subject  23471 non-null  object
 3   date     23471 non-null  object
 4   year     23471 non-null  int64 
dtypes: int64(1), object(4)
memory usage: 917.0+ KB


In [7]:
fake_news.shape

(23471, 5)

In [8]:
fake_news.isnull().sum()

title      0
text       0
subject    0
date       0
year       0
dtype: int64

In [9]:
# If null values were found in dataset - dropna would of been used 
# fake_news.dropna(inplace=True)

In [10]:
# Sort by date column
fake_news = fake_news.sort_values(by="date")

In [11]:
fake_news

Unnamed: 0,title,text,subject,date,year
14341,HILLARY RODHAM NIXON: A CANDIDATE WITH MORE BA...,The irony here isn t lost on us. Hillary is be...,left-news,2015-03-31,2015
14342,WATCH DIRTY HARRY REID ON HIS LIE ABOUT ROMNEY...,"In case you missed it Sen. Harry Reid (R-NV), ...",left-news,2015-03-31,2015
14343,HILLARY RODHAM NIXON: A CANDIDATE WITH MORE BA...,The irony here isn t lost on us. Hillary is be...,politics,2015-03-31,2015
14344,FLASHBACK: KING OBAMA COMMUTES SENTENCES OF 22...,Just making room for Hillary President Obama t...,politics,2015-03-31,2015
14345,APPLE’S CEO SAYS RELIGIOUS FREEDOM LAWS ARE ‘D...,The gay mafia has a new corporate Don. This i...,politics,2015-03-31,2015
...,...,...,...,...,...
30,JUST IN: BADASS GENERAL JOHN KELLY Shoved Chin...,Just one more reminder of why President Trump ...,politics,2018-02-18,2018
31,IT BEGINS…RINO MEGA-DONOR Threatens Jeb Bush: ...,A longtime Republican Party donor based in Flo...,politics,2018-02-18,2018
34,WATCH: SNOWFLAKES ASKED Communist Party Platfo...,Ami Horowitz is fantastic! Check out this man ...,politics,2018-02-19,2018
33,MSNBC ANCHOR Flabbergasted at What Texas Teach...,If we protect every other government building ...,politics,2018-02-19,2018


In [12]:
# Drop specific string from date row
fake_news_df = fake_news[fake_news['date'].str.contains("https")==False]

In [13]:
# Check dataframes
fake_news_df

Unnamed: 0,title,text,subject,date,year
14341,HILLARY RODHAM NIXON: A CANDIDATE WITH MORE BA...,The irony here isn t lost on us. Hillary is be...,left-news,2015-03-31,2015
14342,WATCH DIRTY HARRY REID ON HIS LIE ABOUT ROMNEY...,"In case you missed it Sen. Harry Reid (R-NV), ...",left-news,2015-03-31,2015
14343,HILLARY RODHAM NIXON: A CANDIDATE WITH MORE BA...,The irony here isn t lost on us. Hillary is be...,politics,2015-03-31,2015
14344,FLASHBACK: KING OBAMA COMMUTES SENTENCES OF 22...,Just making room for Hillary President Obama t...,politics,2015-03-31,2015
14345,APPLE’S CEO SAYS RELIGIOUS FREEDOM LAWS ARE ‘D...,The gay mafia has a new corporate Don. This i...,politics,2015-03-31,2015
...,...,...,...,...,...
30,JUST IN: BADASS GENERAL JOHN KELLY Shoved Chin...,Just one more reminder of why President Trump ...,politics,2018-02-18,2018
31,IT BEGINS…RINO MEGA-DONOR Threatens Jeb Bush: ...,A longtime Republican Party donor based in Flo...,politics,2018-02-18,2018
34,WATCH: SNOWFLAKES ASKED Communist Party Platfo...,Ami Horowitz is fantastic! Check out this man ...,politics,2018-02-19,2018
33,MSNBC ANCHOR Flabbergasted at What Texas Teach...,If we protect every other government building ...,politics,2018-02-19,2018


In [14]:
# Drop remaining strings from date column
fake_news_df = fake_news_df[fake_news_df['date'].str.contains("Never")==False]

In [15]:
fake_news_df.shape

(23471, 5)

In [16]:
# Format date column
fake_news_df["date"]=pd.to_datetime(fake_news_df["date"])

In [17]:
fake_news_df.dtypes

title              object
text               object
subject            object
date       datetime64[ns]
year                int64
dtype: object

In [18]:
# Preview date change
fake_news_df.head(5)

Unnamed: 0,title,text,subject,date,year
14341,HILLARY RODHAM NIXON: A CANDIDATE WITH MORE BA...,The irony here isn t lost on us. Hillary is be...,left-news,2015-03-31,2015
14342,WATCH DIRTY HARRY REID ON HIS LIE ABOUT ROMNEY...,"In case you missed it Sen. Harry Reid (R-NV), ...",left-news,2015-03-31,2015
14343,HILLARY RODHAM NIXON: A CANDIDATE WITH MORE BA...,The irony here isn t lost on us. Hillary is be...,politics,2015-03-31,2015
14344,FLASHBACK: KING OBAMA COMMUTES SENTENCES OF 22...,Just making room for Hillary President Obama t...,politics,2015-03-31,2015
14345,APPLE’S CEO SAYS RELIGIOUS FREEDOM LAWS ARE ‘D...,The gay mafia has a new corporate Don. This i...,politics,2015-03-31,2015


In [19]:
# Split date column into year
fake_news_df['year'] = fake_news_df['date'].dt.year

In [20]:
fake_news_df.head(5)

Unnamed: 0,title,text,subject,date,year
14341,HILLARY RODHAM NIXON: A CANDIDATE WITH MORE BA...,The irony here isn t lost on us. Hillary is be...,left-news,2015-03-31,2015
14342,WATCH DIRTY HARRY REID ON HIS LIE ABOUT ROMNEY...,"In case you missed it Sen. Harry Reid (R-NV), ...",left-news,2015-03-31,2015
14343,HILLARY RODHAM NIXON: A CANDIDATE WITH MORE BA...,The irony here isn t lost on us. Hillary is be...,politics,2015-03-31,2015
14344,FLASHBACK: KING OBAMA COMMUTES SENTENCES OF 22...,Just making room for Hillary President Obama t...,politics,2015-03-31,2015
14345,APPLE’S CEO SAYS RELIGIOUS FREEDOM LAWS ARE ‘D...,The gay mafia has a new corporate Don. This i...,politics,2015-03-31,2015


In [21]:
# Save clean dataset - commented out
#fake_news_df.to_csv('data/clean-datasets/fake_news.csv', index=False)

## True News - Data Exploration

In [22]:
true_news.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21417 entries, 0 to 21416
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    21417 non-null  object
 1   text     21417 non-null  object
 2   subject  21417 non-null  object
 3   date     21417 non-null  object
dtypes: object(4)
memory usage: 669.4+ KB


In [23]:
true_news.shape

(21417, 4)

In [24]:
true_news.isnull().sum()

title      0
text       0
subject    0
date       0
dtype: int64

In [25]:
# If null values were found in dataset - dropna would of been used 
# true_news.dropna(inplace=True)

In [26]:
# Sort by date column
true_news = true_news.sort_values(by="date")

In [27]:
true_news

Unnamed: 0,title,text,subject,date
10099,Trump sued in Kentucky by three who say he inc...,"LOUISVILLE, Ky. (Reuters) - Three people who c...",politicsNews,"April 1, 2016"
10019,"In record year for political ads, media buyers...",(Reuters) - Media buyers expect record politic...,politicsNews,"April 1, 2016"
10020,"Obama, Republicans urge Trump to soften tone",WASHINGTON (Reuters) - Donald Trump is facing ...,politicsNews,"April 1, 2016"
10092,State Department says halts review of Clinton ...,WASHINGTON (Reuters) - The U.S. State Departme...,politicsNews,"April 1, 2016"
10094,Obama: Trump doesn't know much about foreign p...,WASHINGTON (Reuters) - U.S. President Barack O...,politicsNews,"April 1, 2016"
...,...,...,...,...
20543,Togo must introduce two-term limit swiftly to ...,DAKAR (Reuters) - Togo must go the way of othe...,worldnews,"September 9, 2017"
20542,Turkey cautions citizens about travel to 'anti...,ANKARA (Reuters) - Turkey cautioned its citize...,worldnews,"September 9, 2017"
20540,German minister urges EU to standardize asylum...,BERLIN (Reuters) - Social benefits for asylum ...,worldnews,"September 9, 2017"
20555,Red Cross says staff member killed in South Su...,JUBA (Reuters) - The International Committee o...,worldnews,"September 9, 2017"


In [28]:
# Format date column
true_news["date"]=pd.to_datetime(true_news["date"])

In [29]:
# Preview date change
true_news.head(5)

Unnamed: 0,title,text,subject,date
10099,Trump sued in Kentucky by three who say he inc...,"LOUISVILLE, Ky. (Reuters) - Three people who c...",politicsNews,2016-04-01
10019,"In record year for political ads, media buyers...",(Reuters) - Media buyers expect record politic...,politicsNews,2016-04-01
10020,"Obama, Republicans urge Trump to soften tone",WASHINGTON (Reuters) - Donald Trump is facing ...,politicsNews,2016-04-01
10092,State Department says halts review of Clinton ...,WASHINGTON (Reuters) - The U.S. State Departme...,politicsNews,2016-04-01
10094,Obama: Trump doesn't know much about foreign p...,WASHINGTON (Reuters) - U.S. President Barack O...,politicsNews,2016-04-01


In [30]:
true_news.dtypes

title              object
text               object
subject            object
date       datetime64[ns]
dtype: object

In [31]:
# Split date column into year
true_news['year'] = true_news['date'].dt.year

In [32]:
true_news.head(5)

Unnamed: 0,title,text,subject,date,year
10099,Trump sued in Kentucky by three who say he inc...,"LOUISVILLE, Ky. (Reuters) - Three people who c...",politicsNews,2016-04-01,2016
10019,"In record year for political ads, media buyers...",(Reuters) - Media buyers expect record politic...,politicsNews,2016-04-01,2016
10020,"Obama, Republicans urge Trump to soften tone",WASHINGTON (Reuters) - Donald Trump is facing ...,politicsNews,2016-04-01,2016
10092,State Department says halts review of Clinton ...,WASHINGTON (Reuters) - The U.S. State Departme...,politicsNews,2016-04-01,2016
10094,Obama: Trump doesn't know much about foreign p...,WASHINGTON (Reuters) - U.S. President Barack O...,politicsNews,2016-04-01,2016


In [33]:
# Save clean dataset - commented out
#true_news.to_csv('data/clean-datasets/true_news.csv', index=False)