<a href="https://colab.research.google.com/github/Matheshmj/Fake_review_classification_and_topic_modelling/blob/main/DL_MODELS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Preprocessing**

In [None]:
###Preprocessing
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load data
data = pd.read_csv('/content/Downloads_cleaned_review_data.csv')
data['label_encoded'] = LabelEncoder().fit_transform(data['label'])

# Split data
X = data['joined_text'].astype(str)  # Convert to string
y = data['label_encoded']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenization and Padding
vocab_size = 10000
max_length = 100
embedding_dim = 100

tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_train_pad = pad_sequences(X_train_seq, maxlen=max_length, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=max_length, padding='post', truncating='post')


**RNN Model**

In [None]:
# RNN Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, SimpleRNN, SpatialDropout1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report

# Build RNN Model
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    SpatialDropout1D(0.2),
    SimpleRNN(64, return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the Model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the Model
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(X_train_pad, y_train,
                    epochs=10,
                    batch_size=64,
                    validation_split=0.2,
                    callbacks=[early_stopping])

# Evaluate the Model
y_pred = (model.predict(X_test_pad) > 0.5).astype("int32")
print("Classification Report for RNN:")
print(classification_report(y_test, y_pred, target_names=['CG', 'OR']))


Epoch 1/10




[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 46ms/step - accuracy: 0.5244 - loss: 0.6895 - val_accuracy: 0.6044 - val_loss: 0.6604
Epoch 2/10
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 51ms/step - accuracy: 0.6134 - loss: 0.6486 - val_accuracy: 0.6363 - val_loss: 0.6331
Epoch 3/10
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 48ms/step - accuracy: 0.7338 - loss: 0.5464 - val_accuracy: 0.7046 - val_loss: 0.5816
Epoch 4/10
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 50ms/step - accuracy: 0.7777 - loss: 0.4903 - val_accuracy: 0.7151 - val_loss: 0.5700
Epoch 5/10
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 47ms/step - accuracy: 0.7720 - loss: 0.4929 - val_accuracy: 0.7879 - val_loss: 0.4744
Epoch 6/10
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 51ms/step - accuracy: 0.8450 - loss: 0.3915 - val_accuracy: 0.7861 - val_loss: 0.5030
Epoch 7/10
[1m405/405[0m 

### **LSTM Model**

In [None]:
# LSTM Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, LSTM, SpatialDropout1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report

# Build LSTM Model
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    SpatialDropout1D(0.2),
    LSTM(64, return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the Model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the Model
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(X_train_pad, y_train,
                    epochs=10,
                    batch_size=64,
                    validation_split=0.2,
                    callbacks=[early_stopping])

# Evaluate the Model
y_pred = (model.predict(X_test_pad) > 0.5).astype("int32")
print("Classification Report for LSTM:")
print(classification_report(y_test, y_pred, target_names=['CG', 'OR']))


Epoch 1/10




[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 127ms/step - accuracy: 0.5344 - loss: 0.6762 - val_accuracy: 0.7296 - val_loss: 0.5682
Epoch 2/10
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 124ms/step - accuracy: 0.7963 - loss: 0.4640 - val_accuracy: 0.8688 - val_loss: 0.3127
Epoch 3/10
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 127ms/step - accuracy: 0.8963 - loss: 0.2514 - val_accuracy: 0.8943 - val_loss: 0.2578
Epoch 4/10
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 128ms/step - accuracy: 0.9341 - loss: 0.1696 - val_accuracy: 0.8958 - val_loss: 0.2485
Epoch 5/10
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 128ms/step - accuracy: 0.9544 - loss: 0.1205 - val_accuracy: 0.8941 - val_loss: 0.2562
Epoch 6/10
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 128ms/step - accuracy: 0.9613 - loss: 0.1015 - val_accuracy: 0.8943 - val_loss: 0.2991
Epoch 7/10
[1m405/40

### **BiLSTM Model**

In [None]:
# BiLSTM Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, LSTM, Bidirectional, SpatialDropout1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report

# Build BiLSTM Model
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    SpatialDropout1D(0.2),
    Bidirectional(LSTM(64, return_sequences=False)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the Model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the Model
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(X_train_pad, y_train,
                    epochs=10,
                    batch_size=64,
                    validation_split=0.2,
                    callbacks=[early_stopping])

# Evaluate the Model
y_pred = (model.predict(X_test_pad) > 0.5).astype("int32")
print("Classification Report for BiLSTM:")
print(classification_report(y_test, y_pred, target_names=['CG', 'OR']))


Epoch 1/10




[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 223ms/step - accuracy: 0.7706 - loss: 0.4316 - val_accuracy: 0.9079 - val_loss: 0.2197
Epoch 2/10
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 227ms/step - accuracy: 0.9327 - loss: 0.1650 - val_accuracy: 0.9122 - val_loss: 0.2091
Epoch 3/10
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 226ms/step - accuracy: 0.9579 - loss: 0.1075 - val_accuracy: 0.9068 - val_loss: 0.2212
Epoch 4/10
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 225ms/step - accuracy: 0.9706 - loss: 0.0754 - val_accuracy: 0.9068 - val_loss: 0.2759
Epoch 5/10
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 223ms/step - accuracy: 0.9786 - loss: 0.0590 - val_accuracy: 0.9054 - val_loss: 0.2750
[1m253/253[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 25ms/step
Classification Report for BiLSTM:
              precision    recall  f1-score   support

          CG      