# Fake News Project
The goal of this project is to create a fake news prediction system. Fake news is a major problem that can have serious negative effects on how people understand the world around them. You will work with a dataset containing real and fake news in order to train a simple and a more advanced classifier to solve this problem. This project covers the full Data Science pipeline, from data processing, to modelling, to visualization and interpretation.

In [23]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from functools import reduce


from cleantext import clean

def clean_text(text):
  clean_text = re.sub(r'(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)\s+\d{1,2},\s+\d{4}', '<DATE>', text)
  clean_text = clean(clean_text,
    lower=True,
    no_urls=True, replace_with_url="<URL>",
    no_emails=True, replace_with_email="<EMAIL>",
    no_numbers=True, replace_with_number="<NUM>",
    no_line_breaks=True 
  )

  return text


def process_text(text):
  stop_words = set(stopwords.words('english'))
  word_tokens = word_tokenize(text)
  
  filtered = []
  for w in word_tokens:
    if w not in stop_words:
      filtered.append(w)

  ps = PorterStemmer()
  for w in filtered:
    w = ps.stem(w)
  print(filtered)




In [24]:
df = pd.read_csv('news_sample.csv')
clean_df = df.copy()
clean_df['content'] = clean_df['content'].apply(clean_text)
clean_df['content'] = clean_df['content'].apply(process_text)
clean_df.to_csv('clean_news_sample.csv')

['Sometimes', 'power', 'Christmas', 'make', 'wild', 'wonderful', 'things', '.', 'You', 'need', 'believe', 'Holy', 'Trinity', 'believe', 'positive', 'power', 'good', 'others', '.', 'The', 'simple', 'act', 'giving', 'without', 'receiving', 'lost', 'many', 'us', 'days', ',', 'worries', 'money', 'success', 'hold', 'us', 'back', 'giving', 'others', 'need', '.', 'One', 'congregation', 'Ohio', 'moved', 'action', 'power', 'sermon', 'given', 'church', 'Christmas', 'Eve', '.', 'The', 'pastor', 'Grand', 'Lake', 'United', 'Methodist', 'Church', 'Celina', ',', 'Ohio', 'gave', 'emotional', 'sermon', 'importance', 'understanding', 'message', 'Jesus', '.', 'For', 'many', 'religious', 'people', 'message', 'Jesus', 'help', 'others', ',', 'make', 'sure', 'people', 'suffering', 'get', 'help', 'need', 'enjoy', 'life', 'little', 'bit', '.', 'The', 'sermon', 'really', 'generosity', 'look', 'like', 'lives', '.', 'Jesus', 'lived', 'long', 'time', 'ago', 'acted', 'generously', 'fashion', 'time', '–', 'would', '