# 👔 **Fake News Detection**

---

In [None]:
!pip install spacy==2.2.3 
!python -m spacy download en_core_web_sm
!pip install beautifulsoup4==4.9.1
!pip install textblob==0.15.3
!pip install texthero

In [None]:
import numpy as np
import pandas as pd
import nltk
import texthero as hero
import gensim
import re
import timeit
from dataclasses import dataclass

from typing import List
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, Conv1D, MaxPool1D
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

In [None]:
data = pd.read_csv('../input/fake-news-data/ml_learning_data.csv')

In [None]:
data

In [None]:
data.iloc[:5].style.background_gradient(cmap='GnBu')

## Data Transformations Functions

---

In [None]:
def clean_strings(dataframe: pd.DataFrame) -> pd.DataFrame:
    """
    :param dataframe:
    :returns:
    """
    dataframe = hero.clean(data['description'])
    return dataframe

In [None]:
def create_word2vec_model(x_list: pd.Series, DIM=100) -> list:
    """
    :param x:
    :param DIM:
    :returns:
    """
    w2v_model = gensim.models.Word2Vec(
        sentences=x_list, size=DIM,
        window=10, min_count=1)
    
    print(f"""
    Total Vocabularies created:
    {len(w2v_model.wv.vocab)}""")
    
    return w2v_model

In [None]:
def tokenize_description(x: List[str]) -> List[int] and Tokenizer:
    """
    :param x:
    :returns:
    """
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(x)
    X = tokenizer.texts_to_sequences(x)
    tk_model = tokenizer
    
    return X, tk_model

In [None]:
def padding_sequence(x, maxlen=1000):
    X = pad_sequences(x, maxlen=maxlen)
    return X

## Classification Model

---

In [None]:
@dataclass
class NewsModel:

    DIM = 100
    maxlen = 1000
    vocab_size = None
    vocab = None
    weight_matrix = None
    model = None


    def get_weight_matrix(self, word_vec_model, tk_model):
        """
        :param model: word2vec model
        """
        # create vocabulary from token
        self.vocab = tk_model
        self.vocab_size = len(self.vocab) + 1

        weight_mat = np.zeros((self.vocab_size, self.DIM))

        for word, idx in self.vocab.items():
            weight_mat[idx] = word_vec_model.wv[word]
        
        self.weight_matrix = weight_mat
        print("Weigh Matrix Created")


    def create_model(self):

        # create sequential model
        model = Sequential()

        # add embedding weights
        model.add(Embedding(
            self.vocab_size,
            output_dim=self.DIM,
            weights=[self.weight_matrix],
            input_length=self.maxlen, trainable=False))
        
        model.add(LSTM(units=128))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy',
            metrics=['acc'])
        
        self.model = model

## Process Data

---

In [None]:
# seperate our class variable
y = data['news_is'].values

In [None]:
# clean x_features
data_clean_description = clean_strings(data)
X = data_clean_description

In [None]:
# split x features
X = [text.split() for text in X.tolist()]

In [None]:
# create word2vec model
w2v_model = create_word2vec_model(X)

In [None]:
# Tokenize features
X, tk_model = tokenize_description(X)

In [None]:
# padding sequence
X = padding_sequence(X)

## Create NewsModel Object

---

In [None]:
# bloomberg news model object
bloomberg = NewsModel()

In [None]:
bloomberg.get_weight_matrix(
    w2v_model,
    tk_model.word_index)

In [None]:
bloomberg.create_model()

## Model Summary

---

In [None]:
bloomberg.model.summary()

## Train Test Split

---

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=25)

In [None]:
bloomberg.model.fit(
    X_train, y_train, validation_split=0.3,
    epochs=3)

## Save Model

---

In [None]:
bloomberg.model.save('./tf_saved/model/bloomberg_news_model.h5')

## Load Model

---

In [None]:
bloomberg_model = tf.keras.models.load_model('./tf_saved/model/bloomberg_news_model.h5')

In [None]:
bloomberg_model.summary()

##  Prediction Score

---

In [None]:
y_pred = (bloomberg_model.predict(X_test) >= 0.5).astype(int)

In [None]:
accuracy_score(y_test, y_pred)