---
    Gabriel Graells Solé - gabriel.graells01@estudiant.upf.edu
---
# Fake News Detector

In [None]:
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

from collections import defaultdict

import string
import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords

In [None]:
FILENAME = "PolitifactDataset.csv"
data = pd.read_csv(f"data/{FILENAME}", index_col = 0)

data.head()

First we have to tranform **Tags** column to be used by one-hot-enconding preprocessing layer. To do so first we will retrive all unique tags. Since many tags are formed by two words we will concatenate both words into a single one.

In [None]:
def tags_preprocessing(tags):
    tokens = tags.split(",")[:-1]
    tokens = [t.strip() for t in tokens]
    tokens = [t.replace(" ","") for t in tokens]
    tags_processed = " ".join(tokens)
    return tags_processed

data["Tags"] = data["Tags"].apply(lambda x: tags_preprocessing(x))

Simplify Target column to true or false and change **rating column to targer and map it into a numerical value.**

| Category  | Label |
|-------|---|
| True | 0 |
| False | 1 |

In [None]:
true = ['true','mostly-true','half-true']
false = ['false','barely-true','pants-fire']

def target_preprocessing(target):
    if target in true:
        return 0
    elif target in false:
        return 1

data = data.rename(columns = {"Rating": "Target"})
data["Target"] = data["Target"].apply(lambda x: target_preprocessing(x))

**Author** column will be one-hot-encoded. Since it contains authors with name and surname we will concatenate them to form one single term.

In [None]:
def author_preprocessing(author):
    author_processed = author.replace(" ", "")
    return author_processed

data["Author"] = data["Author"].apply(lambda x: author_preprocessing(x))

Saving the final preprocesed data.

In [None]:
data.head()

Unnamed: 0,Title,Tags,Author,Target
0,"Says Ron Johnson referred to ""The Lego Movie"" ...",Corporations SmallBusiness Wisconsin RussFeingold,RussFeingold,0
1,"""Forty percent of the Fortune 500 were started...",Immigration National Economy SteveCase,SteveCase,0
2,"""United States of America is twenty-sixth in s...",NewJersey Education JimWhelan,JimWhelan,0
3,"Says Virginia Gov. Ralph Northam said, ‘You wi...",Fakenews FacebookFact-checks Guns Facebookposts,Facebookposts,1
4,Figures for September 2014’s job growth in Wis...,Jobs Wisconsin ScottWalker,ScottWalker,0


In [None]:
data.to_csv("data/data.csv", index = False)

## From Pandas dataframe to Tensorflow dataset

In [None]:
# Read dataset
data = pd.read_csv("data/data.csv")

In [None]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
    dataframe = dataframe.copy()
    labels = dataframe.pop('Target')
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    return ds

In [None]:
#Split
train, test = train_test_split(data, test_size=0.2)
train, val = train_test_split(data, test_size=0.2)

#Create datasets for testing
batch_size = 4

train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [None]:
############################################################
[(train_features, label_batch)] = train_ds.take(1)
############################################################

# Preprocessing

https://www.tensorflow.org/guide/keras/preprocessing_layers

In [None]:
######################################################
type_col = train_features['Author']
layer = get_one_hot_author(train_ds)
(layer(type_col).shape)
######################################################

## Author Preprocessing - One hot Encoding

In [None]:
def get_one_hot_author(dataset):
    index = preprocessing.StringLookup()
    feature_ds = dataset.map(lambda x, y: x['Author'])
    index.adapt(feature_ds)
    
    encoder = preprocessing.CategoryEncoding(max_tokens=index.vocab_size())
    feature_ds = feature_ds.map(index)
    encoder.adapt(feature_ds)

    return lambda feature: encoder(index(feature))

## Tags Preprocessing - One hot Encoding

In [None]:
def get_one_hot_tags(dataset):
    encoder = preprocessing.TextVectorization(output_mode = "binary")
    feature_ds = dataset.map(lambda x, y: x["Tags"])
    encoder.adapt(feature_ds)
    
    return encoder

## Title Preprocessing - One hot Encoding

In [None]:
def get_one_hot_title(dataset):
    encoder = preprocessing.TextVectorization(output_mode = "binary")
    feature_ds = dataset.map(lambda x, y: x["Title"])
    encoder.adapt(feature_ds)

    return encoder

---
## Explicit Feature Extraction

*"The textual information of fake news can reveal important signals for their credibility inference. Besides some shared words used in both true and false articles (or creators/subjects), a set of frequently used words can also be extracted from the article contents, creator profiles and subject descriptions of each category respectively."* [1]

[1] **FAKEDETECTOR: Effective Fake News Detection with Deep Diffusive Neural Network**

We will split the dataset into two, True and False. For each category we will retrive the most common terms in `Title`, `Author`and `Tags`.

In [None]:
#Split
data_true = data[data["Target"] == 0]
data_false = data[data["Target"] == 1]    

---
#### Title

TextVectorizer layer applies a preprocessing step by applying stemming, removing stopwords, lowercasing and removing punctuation. We will apply a similar preprocessing step to extract common termns. Then we will **TF-IDF** by treating all True news and all False news as two different documents.

In [None]:
nltk.download("stopwords")

stemming = PorterStemmer()
STOPWORDS = set(stopwords.words("english"))

def getTerms(terms):
    terms = terms.lower() 
    terms = terms.translate(str.maketrans("","", string.punctuation))
    terms = terms.split()
    terms = [t for t in terms if t not in STOPWORDS]
    terms = [stemming.stem(t) for t in terms]
    terms = ' '.join(term for term in terms)
    
    return terms

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/gabrielgraells/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
def get_common_words_title(data_true, data_false, VEC_DIM = 40, THRESHOLD = 0.5):
    #Preprocess
    true_titles = data_true["Title"].apply(lambda x: getTerms(x))
    false_titles = data_false["Title"].apply(lambda x: getTerms(x))
    
    true_titles = ' '.join(true_titles.values)
    false_titles = ' '.join(false_titles.values)
    
    corpus = [true_titles, false_titles]
    
    vectorizer = TfidfVectorizer()
    tf_idf = vectorizer.fit_transform(corpus)
    
    df_tfidfvect = pd.DataFrame(data = tf_idf.toarray(),index = ['True','False'],columns = vectorizer.get_feature_names()).T
    
    most_common_true_terms  = df_tfidfvect["True"].sort_values(ascending = False)
    most_common_false_terms = df_tfidfvect["False"].sort_values(ascending = False)
    
    common_terms_target = defaultdict(list)
    
    for true_term, false_term in zip(most_common_true_terms.keys(), most_common_false_terms.keys()):
        # Common TRUE Term  TF-IDF score
        true_true_val = most_common_true_terms[true_term]
        true_false_val = most_common_false_terms[true_term]
        
        # Common FALSE Term  TF-IDF score
        false_true_val = most_common_true_terms[false_term]
        false_false_val = most_common_false_terms[false_term]
           
        true_diff_percentage = (true_true_val-true_false_val)/(true_true_val+true_false_val)
        false_diff_percentage = (false_false_val-false_true_val)/(false_true_val+false_false_val)
        
        if true_diff_percentage > THRESHOLD and len(common_terms_target["True"]) < VEC_DIM:
            common_terms_target["True"].append(true_term)
     
        if false_diff_percentage > THRESHOLD and len(common_terms_target["False"]) < VEC_DIM:
            common_terms_target["False"].append(false_term)  
        
        if len(common_terms_target["True"]) > VEC_DIM and len(common_terms_target["False"]) > VEC_DIM:
            break
        
    list_common_terms = list()
    list_common_terms.extend(common_terms_target["True"])
    list_common_terms.extend(common_terms_target["False"])
    
    return most_common_true_terms, most_common_false_terms, list_common_terms

In [None]:
TITLE_VEC_DIM = 40
most_common_true_terms, most_common_false_terms, list_common_terms = get_common_words_title(data_true, data_false, VEC_DIM = TITLE_VEC_DIM, THRESHOLD = 0.5)

In [None]:
count = 0
for true_k, false_k in zip(most_common_true_terms.keys(), most_common_false_terms.keys()):
    print(f"{true_k} - True: {most_common_true_terms[true_k]} -- False: {most_common_false_terms[true_k]}")
    print(f"{false_k} - True: {most_common_true_terms[false_k]} -- False: {most_common_false_terms[false_k]}")
    print("---")
    count += 1
    if count == 10:
        break

say - True: 0.4518524440263808 -- False: 0.6408749539617608
say - True: 0.4518524440263808 -- False: 0.6408749539617608
---
state - True: 0.2811871545095405 -- False: 0.186750745437761
state - True: 0.2811871545095405 -- False: 0.186750745437761
---
percent - True: 0.26498615823551014 -- False: 0.11390930884918986
obama - True: 0.11185345372755202 -- False: 0.1670813960918857
---
year - True: 0.2481193675940539 -- False: 0.16038084851252157
year - True: 0.2481193675940539 -- False: 0.16038084851252157
---
tax - True: 0.1948558181999815 -- False: 0.15800323485532786
tax - True: 0.1948558181999815 -- False: 0.15800323485532786
---
million - True: 0.13737557114554505 -- False: 0.08862014540449306
presid - True: 0.10830255043461386 -- False: 0.15432874102148303
---
peopl - True: 0.13049569601547736 -- False: 0.12558123043904992
trump - True: 0.06702329965420777 -- False: 0.12601352383126696
---
job - True: 0.12272809506217514 -- False: 0.08343262469788859
peopl - True: 0.13049569601547736 

---
#### Tags

Similarly to `Title` we will compute the **TF-IDF** for tags for each dataset. Then we will evaluate the difference in percetange between the tf-idf score for the most common tags in each dataset. If the difference is above the threshold, then that word is stored for the particular dataset (target).

**A list of size 2VEC_DIM is returned. The first VEC_DIM terms are for True tags and the last VEC_DIM terms for False Tags**

In [None]:
def get_most_common_tags(data_true, data_false, VEC_DIM = 20, THRESHOLD = 0.5):
    true_tags = ' '.join(data_true["Tags"].values)
    false_tags = ' '.join(data_false["Tags"].values)
    
    corpus = [true_tags, false_tags]
    
    vectorizer = TfidfVectorizer()
    tf_idf = vectorizer.fit_transform(corpus)
    
    df_tfidfvect = pd.DataFrame(data = tf_idf.toarray(),index = ['True','False'],columns = vectorizer.get_feature_names()).T
    
    most_common_true_tags  = df_tfidfvect["True"].sort_values(ascending = False)
    most_common_false_tags = df_tfidfvect["False"].sort_values(ascending = False)
    
    common_tags_target = defaultdict(list)
    
    for true_tag, false_tag in zip(most_common_true_tags.keys(), most_common_false_tags.keys()):
        # Common TRUE Tag  TF-IDF score
        true_true_val = most_common_true_tags[true_tag]
        true_false_val = most_common_false_tags[true_tag]
        
        # Common FALSE Tag  TF-IDF score
        false_true_val = most_common_true_tags[false_tag]
        false_false_val = most_common_false_tags[false_tag]
        
        true_diff_percentage = (true_true_val-true_false_val)/(true_true_val+true_false_val)
        false_diff_percentage = (false_false_val-false_true_val)/(false_true_val+false_false_val)
        
        if true_diff_percentage > THRESHOLD and len(common_tags_target["True"]) < VEC_DIM:
            common_tags_target["True"].append(true_tag)
     
        if false_diff_percentage > THRESHOLD and len(common_tags_target["False"]) < VEC_DIM:
            common_tags_target["False"].append(false_tag)  
        
        if len(common_tags_target["True"]) > VEC_DIM and len(common_tags_target["False"]) > VEC_DIM:
            break
        
    list_common_tags = list()
    list_common_tags.extend(common_tags_target["True"])
    list_common_tags.extend(common_tags_target["False"])
            
    return most_common_true_tags, most_common_false_tags,list_common_tags

In [None]:
TAG_VEC_DIM = 20
most_common_true_tags, most_common_false_tags, list_common_tags = get_most_common_tags(data_true, data_false, VEC_DIM = TAG_VEC_DIM)

In [None]:
count = 0
for true_k, false_k in zip(most_common_true_tags.keys(), most_common_false_tags.keys()):
    print(f"{true_k} - True: {most_common_true_tags[true_k]} -- False: {most_common_false_tags[true_k]}")
    print(f"{false_k} - True: {most_common_true_tags[false_k]} -- False: {most_common_false_tags[false_k]}")
    print("---")
    count += 1
    if count == 10:
        break

national - True: 0.6400546663625211 -- False: 0.5502402311455473
national - True: 0.6400546663625211 -- False: 0.5502402311455473
---
economy - True: 0.2501944312831676 -- False: 0.13152719867366897
checks - True: 0.03034119185917143 -- False: 0.31175207090585333
---
texas - True: 0.20660906837435783 -- False: 0.1428320157497529
facebookfact - True: 0.03034119185917143 -- False: 0.31175207090585333
---
healthcare - True: 0.20636826526436441 -- False: 0.20761731360884933
healthcare - True: 0.20636826526436441 -- False: 0.20761731360884933
---
florida - True: 0.20612746215437097 -- False: 0.1532672315122919
punditfact - True: 0.09078277246752087 -- False: 0.17913787059025324
---
taxes - True: 0.19480971598468005 -- False: 0.14718002231747748
facebookposts - True: 0.029618782529191156 -- False: 0.17783346861993587
---
wisconsin - True: 0.18951204756482473 -- False: 0.17631166632123227
wisconsin - True: 0.18951204756482473 -- False: 0.17631166632123227
---
education - True: 0.1632645085755

---
#### Author

An analogous procedure is applied to `Authors`.

In [None]:
def get_most_common_authors(data_true, data_false, VEC_DIM = 10, THRESHOLD = 0.5):
    true_authors = ' '.join(data_true["Author"].values)
    false_authors = ' '.join(data_false["Author"].values)
    
    corpus = [true_authors, false_authors]
    
    vectorizer = TfidfVectorizer()
    tf_idf = vectorizer.fit_transform(corpus)
    
    df_tfidfvect = pd.DataFrame(data = tf_idf.toarray(),index = ['True','False'],columns = vectorizer.get_feature_names()).T

    most_common_true_authors  = df_tfidfvect["True"].sort_values(ascending = False)
    most_common_false_authors = df_tfidfvect["False"].sort_values(ascending = False)
    
    common_authors_target = defaultdict(list)
    for true_author, false_author in zip(most_common_true_authors.keys(), most_common_false_authors.keys()):
        # Common TRUE TF-IDF score
        true_true_val = most_common_true_authors[true_author]
        true_false_val = most_common_false_authors[true_author]
        
        # Common FALSE TF-IDF score
        false_true_val = most_common_true_authors[false_author]
        false_false_val = most_common_false_authors[false_author]
        
        true_diff_percentage = (true_true_val-true_false_val)/(true_true_val+true_false_val)
        false_diff_percentage = (false_false_val-false_true_val)/(false_true_val+false_false_val)
        
        if true_diff_percentage > THRESHOLD and len(common_authors_target["True"]) < VEC_DIM:
            common_authors_target["True"].append(true_author)
        
        if false_diff_percentage > THRESHOLD and len(common_authors_target["False"]) < VEC_DIM:
            common_authors_target["False"].append(false_author)
        
        if len(common_authors_target["True"]) > VEC_DIM and len(common_authors_target["False"]) > VEC_DIM:
            break
    
    list_common_authors = list()
    list_common_authors.extend(common_authors_target["True"])
    list_common_authors.extend(common_authors_target["False"])
    
    return most_common_true_authors, most_common_false_authors, list_common_authors

In [None]:
AUTHOR_VEC_DIM = 10
most_common_true_authors, most_common_false_authors, list_common_authors = get_most_common_authors(data_true, data_false, VEC_DIM = AUTHOR_VEC_DIM)

In [None]:
count = 0
for true_k, false_k in zip(most_common_true_authors.keys(), most_common_false_authors.keys()):
    print(f"{true_k} - True: {most_common_true_authors[true_k]} -- False: {most_common_false_authors[true_k]}")
    print(f"{false_k} - True: {most_common_true_authors[false_k]} -- False: {most_common_false_authors[false_k]}")
    print("---")
    count += 1
    if count == 10:
        break

barackobama - True: 0.6325224174156848 -- False: 0.11007405476307154
facebookposts - True: 0.1839249582556247 -- False: 0.6167162794259762
---
donaldtrump - True: 0.35289666787258067 -- False: 0.4787467450311673
donaldtrump - True: 0.35289666787258067 -- False: 0.4787467450311673
---
hillaryclinton - True: 0.29607432304563974 -- False: 0.05729882302735231
bloggers - True: 0.07177559346560963 -- False: 0.40410748871922153
---
berniesanders - True: 0.1869156079833584 -- False: 0.03317300280530923
viralimage - True: 0.04785039564373975 -- False: 0.37168841779585116
---
facebookposts - True: 0.1839249582556247 -- False: 0.6167162794259762
chainemail - True: 0.02841117241347048 -- False: 0.11158191852694922
---
mittromney - True: 0.1644857350253554 -- False: 0.06031455055510769
barackobama - True: 0.6325224174156848 -- False: 0.11007405476307154
---
scottwalker - True: 0.15850443556988794 -- False: 0.068607801256435
tedcruz - True: 0.07177559346560963 -- False: 0.06936173313837385
---
ricks

---
# Model

## Explicit Feature Extraction

In [None]:
class Explicit_Feature(keras.layers.Layer):
    def __init__(self, list_common_elements, dimension,**kwargs):
        super(Explicit_Feature, self).__init__(**kwargs)
        self.list_common_elements = list_common_elements
        self.dimension = dimension
        
    def build(self, input_shape):
        self.explicit_features = tf.Variable(initial_value=tf.zeros(shape = (input_shape[0], 2*self.dimension)), trainable=False)#input_shape[0] -> batch_size
        
    def call(self, inputs):
        for i, input_ in enumerate(inputs):
            numpy_input = input_.numpy()
            str_input = numpy_input.decode("utf-8")
            tokens = str_input.split()
            for token in tokens:
                if token in self.list_common_elements:
                    index = self.list_common_elements.index(token)
                    temp = self.explicit_features[i].numpy()
                    temp[index] = 1
                    temp = temp.reshape((2*self.dimension))
                    self.explicit_features[i].assign(temp)
        
        return self.explicit_features

In [None]:
####################################################################################################
# TTITLE
title_col = train_features['Title']
explicit = Explicit_Feature(list_common_elements = list_common_terms,dimension = TITLE_VEC_DIM)
tags_explicit_test = explicit(title_col)
tags_explicit_test
####################################################################################################

<tf.Variable 'explicit__feature/Variable:0' shape=(4, 80) dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

In [None]:
####################################################################################################
# TAGS
tags_col = train_features['Tags']
test_tag = tf.convert_to_tensor(["barackobama michigan"], dtype="string")
explicit = Explicit_Feature(list_common_elements = list_common_tags,dimension = TAG_VEC_DIM)
tags_explicit_test = explicit(test_tag)
tags_explicit_test
####################################################################################################

<tf.Variable 'explicit__feature_1/Variable:0' shape=(1, 40) dtype=float32, numpy=
array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)>

In [None]:
####################################################################################################
# AUTHORS
author_col = train_features['Author']
test_col = tf.convert_to_tensor(["barackobama kellyanneconway"], dtype="string")
explicit = Explicit_Feature(list_common_elements = list_common_authors, dimension = AUTHOR_VEC_DIM)
author_implicit_test = explicit(author_col)
author_implicit_test
####################################################################################################

<tf.Variable 'explicit__feature_2/Variable:0' shape=(4, 20) dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.]], dtype=float32)>

---

## Latent Feature Extraction

*Besides those explicitly visible words about the news article content, creator profile and subject description, there also exist some hidden signals about articles, creators and subjects, e.g., news article content information inconsistency and profile/description latent patterns, which can be effectively detected from the latent features as introduced in [33]. Based on such an intuition, in this paper, we propose to further extract a set of latent features for news articles, creators and subjects based on the deep recurrent neural network model.*[1]

[1] **FAKEDETECTOR: Effective Fake News Detection with Deep Diffusive Neural Network**

*The latent feature extraction is based on RNN model (with the basic neuron cells), which has 3 layers (1 input layer, 1 hidden layer, and 1 fusion layer)*

* **Input Layer**: Vectorized Features
* **Hidden Layer**: GRU - Gated Recurrent Unit, [GRU](https://keras.io/api/layers/recurrent_layers/gru/)
* **Fusion Layer**: Sigmoid of weighted sum, [Dense Layer](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense)

**About GRU tensor shape**

It should be 3D [batch, timestep, features]. Online [answers](https://stats.stackexchange.com/questions/264546/difference-between-samples-time-steps-and-features-in-neural-network) states the following:
* **Samples(batch)** - This is the len(dataX), or the amount of data points you have.

* **Time steps** - This is equivalent to the amount of time steps you run your recurrent neural network. If you want your network to have memory of 60 characters, this number should be 60.

* **Features** - this is the amount of features in every time step. If you are processing pictures, this is the amount of pixels. In this case you seem to have 1 feature per time step.

In [None]:
class Latent_Feature(keras.layers.Layer):
    def __init__(self, **kwargs):
        super(Latent_Feature, self).__init__(**kwargs)
        
    def build(self, input_shape):
        self.GRU = layers.GRU(32)
        self.dense = layers.Dense(64, activation = "sigmoid")      
        
    def call(self, inputs):
        x = self.GRU(inputs)
        return self.dense(x)

## Hybrid Feature Extraction Unit - HFLU

In [None]:
class Hybrid_Feature_Extraction(keras.layers.Layer):
    def __init__(self,vectorizer, list_common_elements, dimension, **kwargs):
        super(Hybrid_Feature_Extraction, self).__init__(**kwargs)
        self.list_common_elements = list_common_elements
        self.dimension = dimension
        self.vectorizer = vectorizer
        
    def build(self, input_shape):
        self.explicit_feature = Explicit_Feature(list_common_elements = self.list_common_elements, dimension = self.dimension)
        self.latent_feature = Latent_Feature()
        
    def call(self, inputs):
        explicit = self.explicit_feature(inputs)
        vectorized_inputs = self.vectorizer(inputs)
        vectorized_inputs = tf.reshape(vectorized_inputs, [vectorized_inputs.shape[0],1,vectorized_inputs.shape[1]])
        latent = self.latent_feature(vectorized_inputs)
        return layers.Concatenate()([explicit,latent])
       

In [None]:
####################################################################################################
type_col = train_features['Title']
vectorizer_layer = get_one_hot_title(train_ds)
HFLU = Hybrid_Feature_Extraction(vectorizer_layer, list_common_elements = list_common_terms, dimension = TITLE_VEC_DIM)
y = HFLU(type_col)
####################################################################################################

---
## Deep Difussive Unit - GDU

*"Besides the HFLU feature learning unit model, FAKEDETECTOR also uses a gated diffusive unit (GDU) model for effective relationship modeling among news articles, creators and subjects."*

<center>
    <img src="https://drive.google.com/uc?id=1DlamGKtKVp9d9iM7JqVcNff-hzfyRG3v" alt="gdu" width="400"/>
</center>


GDU can be broken down into different gates:
* **Forget Gate**
<center>
    <img src="https://drive.google.com/uc?id=1BScZ2Z3GQFCW4OgCCas-mOFdxOtFguYg" alt="gdu" alt="gdu" width="300"/>
</center>

* **Adjust Gate**
<center>
    <img src="https://drive.google.com/uc?id=1nxxbQQtoEBX_SxFRXAqw9vkivZP9Zu93" alt="gdu" alt="gdu" width="300"/>
</center>

* **Final Output**
<center>
    <img src="https://drive.google.com/uc?id=1k1KsvR9nBt3s0xhDOYJbxtfX08oD4qum" alt="gdu" alt="gdu" width="300"/>
</center>

    * **Selection Gates**
    
$$ r_{i} = \sigma (W_{r}[x_{i}^T,z_{i}^T,t_{i}^T]^T)$$
<center>
    <img src="https://drive.google.com/uc?id=1DUtX46xqATRJzHruR2Y2abOyllWK4Cw2" alt="gdu" alt="gdu" width="300"/>
</center>

Where:

$x_i$ HFLU output for `Title`.

$z_i$ input from other GDU from `Tags`.

$t_i$ input from other GDU from `Author`.


**!!! Size of output vector for HFLU should be equal to size of the output of GDU!!!!**

### Forget Gate Title

In [None]:
"""
It is assumed that inputs has as first element x vector, second z vector and third t vector.
inputs[0] -> x
inputs[1] -> z
inputs[2] -> t
"""
class Forget_Gate_Title(keras.layers.Layer):
    def __init__(self):
        super(Forget_Gate_Title, self).__init__()
        
    def build(self, input_shape):
        self.shape = input_shape
        self.dense = layers.Dense(units = int(input_shape[1]/input_shape[0]), activation = "sigmoid")
    
    def call(self, inputs):
        f = self.dense(inputs)
        f = tf.reshape(f, (self.shape[1]))
        return layers.Multiply()([f, inputs[1]])

### Adjust Gate Title

In [None]:
class Adjust_Gate_Title(keras.layers.Layer):
    def __init__(self):
        super(Adjust_Gate_Title, self).__init__()
        
    def build(self, input_shape):
        self.shape = input_shape
        self.dense = layers.Dense(units = int(input_shape[1]/input_shape[0]), activation = "sigmoid")
    
    def call(self, inputs):
        e = self.dense(inputs)
        e = tf.reshape(e, (self.shape[1]))
        return layers.Multiply()([e, inputs[2]])

### GDU Title

In [None]:
class Gated_Diffusive_Unit_Title(keras.layers.Layer):
    def __init__(self):
        super(Gated_Diffusive_Unit_Title, self).__init__()

        
    def build(self, input_shape):
        self.units = int(input_shape[1]/input_shape[0])
        self.shape = input_shape
        self.forget_gate = Forget_Gate_Title()
        self.adjust_gate = Adjust_Gate_Title()
        self.selection_gate_r = layers.Dense(units = self.units, activation = "sigmoid")
        self.selection_gate_g = layers.Dense(units = self.units, activation = "sigmoid")
        self.dense_tanh = layers.Dense(units = self.units, activation = "tanh")
        self.ones = tf.Variable(initial_value=tf.ones(shape = (self.units)), trainable=False)
        
    def call(self, inputs):
        g = self.selection_gate_g(inputs)
        r = self.selection_gate_r(inputs)
        
        forget_output = self.forget_gate(inputs) #z
        adjust_output = self.adjust_gate(inputs) #t
        
        tanh_input = self.dense_tanh(inputs)
        
        input_z = layers.Concatenate()([inputs[0], forget_output, inputs[2]])
        input_z = tf.reshape(input_z, (self.shape[0], self.shape[1]))
        tanh_z = self.dense_tanh(input_z)

        input_t = layers.Concatenate()([inputs[0], inputs[1], adjust_output])
        input_t = tf.reshape(input_t, (self.shape[0], self.shape[1]))
        tanh_t = self.dense_tanh(input_t)
        
        input_z_t = layers.Concatenate()([inputs[0], forget_output, adjust_output])
        input_z_t = tf.reshape(input_z_t, (self.shape[0], self.shape[1]))
        tanh_z_t = self.dense_tanh(input_z_t)
        
        ones_g = tf.math.subtract(self.ones, g)
        ones_r = tf.math.subtract(self.ones, r)
        
        output_1 = tf.math.multiply(g,r)
        output_1 = tf.math.multiply(output_1, tanh_z_t)
        
        output_2 = tf.math.multiply(ones_g,r)
        output_2 = tf.math.multiply(output_2, tanh_t)
        
        output_3 = tf.math.multiply(g, ones_r)
        output_3 = tf.math.multiply(output_3, tanh_z)
        
        
        output_4 = tf.math.multiply(ones_g, ones_r)
        output_4 = tf.math.multiply(output_4, tanh_input)
            
        output = tf.math.add(output_1, output_2)
        output = tf.math.add(output, output_3)
        output = tf.math.add(output, output_4)
        output = tf.reshape(output, (self.shape[1]))
        
        return output

In [None]:
########################################################################
x = tf.ones((2,3))
z = tf.ones((2,3))
t = tf.ones((2,3))
inputs = layers.Concatenate()([x,z,t])
inputs = tf.reshape(inputs,(3,6))
gated_diffusive_unit_Title = Gated_Diffusive_Unit_Title()
gated_diffusive_unit_Title(inputs)

########################################################################

<tf.Tensor: shape=(6,), dtype=float32, numpy=
array([-0.4780119 ,  0.2760865 , -0.44729024,  0.11744387, -0.4620368 ,
        0.16545723], dtype=float32)>

### Forget & Adjust Gate Author

In [None]:
class Forget_Adjust_Gate(keras.layers.Layer):
    def __init__(self):
        super(Forget_Adjust_Gate, self).__init__()
    
    def build(self, input_shape):
        self.shape = input_shape
        self.dense = layers.Dense(units = int(input_shape[1]/input_shape[0]), activation = "sigmoid")
    
    def call(self, inputs):
        f = self.dense(inputs)
        f = tf.reshape(f,(self.shape[1]))
        return layers.Multiply()([f, inputs[1]])

### GDU Author

In [None]:
"""
inputs:
    inputs[0] -> raw data
    inputs[1] -> GDU Title output
"""
class Gated_Diffusive_Unit_Author(keras.layers.Layer):
    def __init__(self):
        super(Gated_Diffusive_Unit_Author, self).__init__()

        
    def build(self, input_shape):
        self.units = int(input_shape[1]/input_shape[0])
        self.shape = input_shape
        self.forget_gate = Forget_Adjust_Gate()
        self.adjust_gate = Forget_Adjust_Gate()
        self.selection_gate_r = layers.Dense(units = self.units, activation = "sigmoid")
        self.selection_gate_g = layers.Dense(units = self.units, activation = "sigmoid")
        self.dense_tanh = layers.Dense(units = self.units, activation = "tanh")
        self.ones = tf.Variable(initial_value=tf.ones(shape = (self.units)), trainable=False)
        
    def call(self, inputs):
        g = self.selection_gate_r(inputs)
        r = self.selection_gate_g(inputs)
        
        forget_output = self.forget_gate(inputs)#z
        adjust_gate = self.adjust_gate(inputs)#t
        
        tanh_input = self.dense_tanh(inputs)
        
        input_z = layers.Concatenate()([inputs[0], forget_output])
        input_z = tf.reshape(input_z, (self.shape[0], self.shape[1]))
        tanh_z = self.dense_tanh(input_z)
        
        input_t = layers.Concatenate()([inputs[0], adjust_gate])
        input_t = tf.reshape(input_t, (self.shape[0], self.shape[1]))
        tanh_t = self.dense_tanh(input_t)
        
        input_z_t = layers.Concatenate()([forget_output, adjust_gate])
        input_z_t = tf.reshape(input_z_t,(self.shape[0], self.shape[1]))
        tanh_z_t = self.dense_tanh(input_z_t)
        
        ones_g = tf.math.subtract(self.ones, g)
        ones_r = tf.math.subtract(self.ones, r)
        
        output_1 = tf.math.multiply(g,r)
        output_1 = tf.math.multiply(output_1,tanh_z_t)
        
        output_2 = tf.math.multiply(ones_g,r)
        output_2 = tf.math.multiply(output_2,tanh_t)
        
        output_3 = tf.math.multiply(g,ones_r)
        output_3 = tf.math.multiply(output_3,tanh_z)
        
        output_4 = tf.math.multiply(ones_g,ones_r)
        output_4 = tf.math.multiply(output_4,tanh_input)
        
        output = tf.math.multiply(output_1,output_2)
        output = tf.math.multiply(output,output_3)
        output = tf.math.multiply(output,output_4)
        
        
        return output

In [None]:
########################################################################
x = tf.ones((2,3))
z = tf.ones((2,3))
inputs = layers.Concatenate()([x,z])
gated_diffusive_unit_Author = Gated_Diffusive_Unit_Author()
gated_diffusive_unit_Author(inputs)
########################################################################

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[3.3488273e-04, 4.7071858e-06, 2.0523685e-04],
       [1.6064513e-04, 2.1934145e-06, 4.3841763e-05]], dtype=float32)>

In [None]:
"""
inputs:
    inputs[0] -> Raw tensor from dataset 
    inputs[1] -> GDU output Title 
    inputs[2] -> GDU output Title (if exist!) 
"""

class Main_Layer_Author(keras.layers.Layer):
    def __init__(self, vectorizer, list_common_authors, dimension, name = "Main_Layer_Author"):
        super(Main_Layer_Author, self).__init__()
        self.vectorizer = vectorizer
        self.list_common_authors = list_common_authors
        self.dimension = dimension

    def build(self, input_shape):
            self.HFLU  = Hybrid_Feature_Extraction(vectorizer, self.list_common_authors, self.dimension)
            self.GDU = Gated_Diffusive_Unit()
    
    def call(self, inputs):
        x = self.HFLU(inputs[0])
        x = layers.Concateante()([x,inputs[]])

---
## Deep Diffusive Network Model
<center>
    <img src="https://drive.google.com/uc?id=182pvb9CBDtbXDeb_qd2E28idqJ2TNEiC" width="400">
</center>

All of the output vectors from GDU $h_{u,j}$, $h_{n,i}$ and $h_{s,l}$ inputed to a `softmax` layer.

<center>
    <img src="img/softmax.png" width="200">
</center>

Where:

* $u$: `Author`
* $n$: `Title`
* $s$: `Tags`

The used **loss function** is the `cross-entropy`for each of the individual prediction, e.i:
<center>
    <img src="img/losstitle.png" width="250">
</center>

<center>
    <img src="img/losstag.png" width="250">
</center>

<center>
    <img src="img/lossauthor.png" width="250">
</center>

Finally all the individual losses are combined to form the main **objective function** represented as follows:

<center>
    <img src="img/loss.png" width="350">
</center

In [None]:
class Deep_Diffusive_Net(keras.Model):
    def __init__(self, name ="DeepDiffusiveNet"):
        super(Deep_Diffusive_Net, self).__init__(name = name, **kwargs)
        self.HFLU_title_1 = Hybrid_Feature_Extraction()#ARGUMENTS !!!
        self.GDU_title_1 = Gated_Diffusive_Unit()
        self.HFLU_title_2 = Hybrid_Feature_Extraction()#ARGUMENTS !!!
        self.GDU_title_2 = Gated_Diffusive_Unit()
        self.selfmax_title_2 = layers.Softmax()
        self.HFLU_title_3 = Hybrid_Feature_Extraction()#ARGUMENTS !!!
        self.GDU_title_3 = Gated_Diffusive_Unit()
        self.HFLU_title_4 = Hybrid_Feature_Extraction()#ARGUMENTS !!!
        self.GDU_title_4 = Gated_Diffusive_Unit()
        self.softmax_title = layers.Softmax()
        
        self.HFLU_tags_1 = Hybrid_Feature_Extraction()#ARGUMENTS !!!
        self.GDU_tags_1 = Gated_Diffusive_Unit()
        self.HFLU_tags_2 = Hybrid_Feature_Extraction()#ARGUMENTS !!!
        self.GDU_tags_2 = Gated_Diffusive_Unit()
        self.HFLU_tags_3= Hybrid_Feature_Extraction()#ARGUMENTS !!!
        self.GDU_tags_3 = Gated_Diffusive_Unit()
        self.selfmax_tags = layers.Softmax()
        
        self.HFLU_author_1 = Hybrid_Feature_Extraction()#ARGUMENTS !!!
        self.GDU_author_1 = Gated_Diffusive_Unit()
        self.HFLU_author_2 = Hybrid_Feature_Extraction()#ARGUMENTS !!!
        self.GDU_author_2 = Gated_Diffusive_Unit()
        self.HFLU_author_3 = Hybrid_Feature_Extraction()#ARGUMENTS !!!
        self.GDU_author_3 = Gated_Diffusive_Unit()
        self.selfmax_author = layers.Softmax()
    
    def call(self, inputs):
        
        return output

## Preprocessing Layers

In [None]:
batch_size = 256
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [None]:
all_inputs = []
encoded_features = []

# Prepare Inputs
tags_col = keras.Input(shape=(1,), name="Tags", dtype="string")
author_col = keras.Input(shape=(1,), name="Author", dtype="string")
title_col = keras.Input(shape=(1,), name="Title", dtype="string")
all_inputs.extend([tags_col,author_col,title_col])

#Init Preprocessing Layers
tags_one_hot_layer = get_one_hot_tags(train_ds)
authors_one_hot_layer = get_one_hot_author(train_ds)
title_one_hot_layer = get_one_hot_title(train_ds)

#Encode Features
encoded_tags = tags_one_hot_layer(tags_col)
encoded_authors = authors_one_hot_layer(author_col)
encoded_title = title_one_hot_layer(title_col)
encoded_features.extend([encoded_tags, encoded_authors, encoded_title])