# Persian Fake News Detection with Deep Learning
### Author and Developer: 👩🏻‍💻 Shahrzad Bahmanyar
#### 📅 February 2024

## Import Libraries

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, Concatenate
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import ReduceLROnPlateau
from transformers import TFBertModel
import transformers
from PersianFakeNews import PersianFakeNewsUtility
from PersianFakeNewsConfig import PersianFakeNewsDetectionConfig
from keras.utils import plot_model

#### Utility and Configuration Setup

In [None]:
Setting = {
    "Title": "Persian Fake News Detection",
    "TaskName": "PersianFakeNewsDetection",
    "Model": "Transformer",
    "Number": 8,
    "EmbeddingModel": "ParsBert",
    "EmbeddingModel_Freeze": False,
    "Epochs": 500,
    "BatchSize": 8,
    "Labels": ["offensive", "sentiment", "topic_assignment", "FAKE_label"]
}

#### Utility and Configuration Setup

In [None]:
utility = PersianFakeNewsUtility(Setting)
modelConfig = PersianFakeNewsDetectionConfig(Setting["EmbeddingModel"])
 
labels = Setting["Labels"]
batch_size = Setting["BatchSize"]

#### System Summary

In [None]:
utility.get_summary_system()

#### Load Datasets

In [None]:
train_dataset = utility.get_dataset("Train", labels, batch_size)
dev_dataset = utility.get_dataset("Dev", labels, batch_size)

#### Load Embedding Model

In [None]:
bert_model = TFBertModel.from_pretrained(modelConfig.ModelName, trainable=False)
input_ids = Input(shape=(280,), dtype=tf.int32, name='input_ids')
attention_masks = Input(shape=(280,), dtype=tf.int32, name='attention_masks')
embedding_layer = bert_model(input_ids, attention_mask=attention_masks)[1]

## Architecture Model
### Auxiliary model (topic modeling) to improve the performance of the main model (fake news detection)

In [None]:
x1 = Dense(280)(embedding_layer)
x1 = BatchNormalization()(x1)
x1 = tf.keras.activations.relu(x1)
x1 = Dropout(0.2)(x1)
x1 = Dense(64)(x1)
x1 = BatchNormalization()(x1)
x1 = tf.keras.activations.relu(x1)
x1 = Dropout(0.2)(x1)
out_offensive = Dense(2, activation='softmax', name=labels[0])(x1)

x2 = Dense(280)(embedding_layer)
x2 = BatchNormalization()(x2)
x2 = tf.keras.activations.relu(x2)
x2 = Dropout(0.2)(x2)
x2 = Dense(64)(x2)
x2 = BatchNormalization()(x2)
x2 = tf.keras.activations.relu(x2)
x2 = Dropout(0.2)(x2)
out_sentiment = Dense(3, activation='softmax', name=labels[1])(x2)

x3 = Dense(280)(embedding_layer)
x3 = BatchNormalization()(x3)
x3 = tf.keras.activations.relu(x3)
x3 = Dropout(0.2)(x3)
x3 = Dense(64)(x3)
x3 = BatchNormalization()(x3)
x3 = tf.keras.activations.relu(x3)
x3 = Dropout(0.2)(x3)
out_topic = Dense(10, activation='softmax', name=labels[2])(x3)

### Fake news Detection

In [None]:
x = Dense(280)(embedding_layer)
x = BatchNormalization()(x)
x = tf.keras.activations.relu(x)
x = Dropout(0.2)(x)
x = Concatenate()([x1, x2, x3, x])
x = Dense(472)(x)
x = BatchNormalization()(x)
x = tf.keras.activations.relu(x)
x = Dense(64)(x)
x = BatchNormalization()(x)
x = tf.keras.activations.relu(x)
x = Dropout(0.2)(x)
out_fake = Dense(2, activation='softmax', name = labels[3])(x)

In [None]:
_model_name = f"{Setting['Number']}_{Setting['TaskName']}_{Setting['Model']}_{Setting['EmbeddingModel']}_Epochs{Setting['Epochs']}_Batchs{Setting['BatchSize']}"
model = tf.keras.models.Model(inputs=[input_ids, attention_masks], outputs=[out_offensive, out_sentiment, out_topic, out_fake], name=_model_name)

losses = {
    labels[0]: SparseCategoricalCrossentropy(),
    labels[1]: SparseCategoricalCrossentropy(),
    labels[2]: SparseCategoricalCrossentropy(),
    labels[3]: SparseCategoricalCrossentropy(),
}

#### Summary

In [None]:
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)

model.compile(optimizer='adam', loss=losses, metrics=['accuracy'])
model.summary()

In [None]:
model_plot_path = f"results/{Setting['Number']}_{Setting['TaskName']}_{Setting['Model']}_{Setting['EmbeddingModel']}_Epochs{Setting['Epochs']}_Batchs{Setting['BatchSize']}.png"
plot_model(
    model,
    to_file=model_plot_path,
    show_shapes=True,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=300,
)

#### Training

In [None]:
history1 = model.fit(
    train_dataset,
    validation_data = dev_dataset,
    epochs = Setting["Epochs"],
    callbacks=[reduce_lr]
)
model_path = f"models/{Setting['Number']}_{Setting['TaskName']}_{Setting['Model']}_{Setting['EmbeddingModel']}_Epochs{Setting['Epochs']}_Batchs{Setting['BatchSize']}.h5"
model.save(model_path)
print(F"✅ Saved model to: {model_path}")

#### Load from file

In [None]:
# model = tf.keras.models.load_model(model_path, custom_objects={"TFBertModel": transformers.TFBertModel})

# Results and Visualization

#### Plot Train History, accuracy and loss (train data and validation data)

In [None]:
utility.plot_history(history1)

#### Result and Plots Test Dataset (show and save) 

In [None]:
utility.generate_predictions(model)