In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv("/content/drive/MyDrive/FakeReviewDetectionProject/Dataset_Labelled_Extracted/Amazon_700f.csv")

# Display column names
print(df.columns)

Index(['Review', 'Rating', 'Cleaned_Review', 'Review_Length', 'Sentence_Count',
       'Word_Diversity', 'Stopword_Ratio', 'Punctuation_Count',
       'Extreme_Rating', 'Deviation_From_Avg', 'Tokenized_Review', 'Embedding',
       'Fake_Label'],
      dtype='object')


In [None]:
print(df['Embedding'].head())
print(type(df['Embedding'].iloc[0]))

0    [ 2.12379489e-02  8.78792182e-02 -3.02176088e-...
1    [ 0.08099937  0.081816   -0.29075378  0.025483...
2    [ 3.18480283e-02  1.28940806e-01 -3.68654728e-...
3    [ 1.21113881e-01  8.01701024e-02 -2.44912222e-...
4    [-2.90161204e-02  2.67002862e-02 -3.18417996e-...
Name: Embedding, dtype: object
<class 'str'>


In [None]:
import numpy as np

def convert_embedding(embedding_str):
    try:
        # Remove brackets and split by spaces
        embedding_list = [float(x) for x in embedding_str.strip("[]").split()]

        # Ensure correct size (700 dimensions)
        if len(embedding_list) != 700:
            return np.zeros(700, dtype=np.float32)  # Return zero vector if size mismatch

        return np.array(embedding_list, dtype=np.float32)
    except ValueError:
        return np.zeros(700, dtype=np.float32)  # Default to zero vector if conversion fails

# Apply conversion
df['Embedding'] = df['Embedding'].apply(convert_embedding)

In [None]:
print(df['Embedding'].head())
print(type(df['Embedding'].iloc[0]))

0    [0.021237949, 0.08787922, -0.3021761, 2.997799...
1    [0.08099937, 0.081816, -0.29075378, 0.02548327...
2    [0.03184803, 0.1289408, -0.36865473, 0.1530441...
3    [0.12111388, 0.0801701, -0.24491222, 0.0087511...
4    [-0.02901612, 0.026700286, -0.318418, -0.04691...
Name: Embedding, dtype: object
<class 'numpy.ndarray'>


In [None]:
import pandas as pd
import numpy as np
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk

# Download necessary data
nltk.download('vader_lexicon')

# Initialize Sentiment Analyzer
sid = SentimentIntensityAnalyzer()

# Display the updated DataFrame
df.head()


In [None]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
import numpy as np

# Extract features and labels
X_features = df[['Review_Length', 'Sentence_Count', 'Word_Diversity', 'Stopword_Ratio',
                 'Punctuation_Count', 'Extreme_Rating', 'Deviation_From_Avg']].values

# Ensure embeddings are properly formatted as numeric arrays
X_embeddings = np.vstack(df['Embedding'].values)

# Concatenate extracted features with embeddings
X = np.hstack((X_features, X_embeddings))
y = df['Fake_Label'].values
original_ratings = df['Rating'].values

# Split into train and test before applying SMOTE
X_train, X_test, y_train, y_test, Rating_train, Rating_test = train_test_split(X, y, original_ratings, test_size=0.2, random_state=42)

# to remove mote just remove this lower code n make apt. changes
# Apply SMOTE to fully balance classes (50-50)
# smote = SMOTE(sampling_strategy=1.0, random_state=42)
# X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [None]:
# Zero-padding function
def pad_features(features, target_size):
    padded = np.zeros((features.shape[0], target_size))  # Create zero matrix
    padded[:, :features.shape[1]] = features  # Copy actual data
    return padded

target_size = 784
# Apply padding to match 784 features
X_train_padded = pad_features(X_train, target_size)
X_test_padded = pad_features(X_test, target_size)

In [None]:
# Reshape into (28, 28, 1) for CNN
X_train_reshaped = X_train_padded.reshape(-1, 28, 28, 1)
X_test_reshaped = X_test_padded.reshape(-1, 28, 28, 1)

print("Reshaped X_train shape:", X_train_reshaped.shape)
print("Reshaped X_test shape:", X_test_reshaped.shape)

Reshaped X_train shape: (15989, 28, 28, 1)
Reshaped X_test shape: (3998, 28, 28, 1)


**Proposed CNN**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np
import tensorflow as tf

def train_and_evaluate_proposed_CNN(X_train, X_test, y_train, y_test, Rating_test):
    learning = 0.0001
    Loss = tf.keras.losses.BinaryCrossentropy()  # Changed to binary cross-entropy
    Batch_size = 16
    Epochs = 100
    eps = 1e-08
    beta = 0.9
    betaa = 0.999
    verbose = 1  # Set to 1 to display epoch logs

    model = Sequential()

    # Define input layer explicitly
    model.add(tf.keras.layers.Input(shape=(28, 28, 1)))

    # Convolutional and pooling layers
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPool2D(2, 2))

    # Fully connected layer
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))

    # Output layer for binary classification
    model.add(Dense(1, activation='sigmoid'))

    # Compile the model
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning, beta_1=beta, beta_2=betaa, epsilon=eps)
    model.compile(loss=Loss, optimizer=optimizer, metrics=['accuracy'])

    # Fit the model with history tracking
    history = model.fit(X_train, y_train, epochs=Epochs, batch_size=Batch_size, verbose=verbose)

    # Predict on test set
    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to binary labels

    def rerating_accuracy(Rating_test, y_pred_labels):
        max_rating = 5
        adjusted_ratings = [3 if pred == 1 else Rating_test[i] for i, pred in enumerate(y_pred_labels)]
        re_acc = 1 - np.mean(np.abs(np.array(Rating_test) - np.array(adjusted_ratings)) / max_rating)
        return re_acc

    re_rating_acc = rerating_accuracy(Rating_test, y_pred)

    # Compute evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Re-Rating Accuracy: {re_rating_acc:.4f}')

    return model, y_pred, history

In [None]:
model, y_pred, history = train_and_evaluate_proposed_CNN(X_train_reshaped, X_test_reshaped, y_train, y_test, Rating_test)

Epoch 1/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7723 - loss: 0.4908
Epoch 2/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8712 - loss: 0.3416
Epoch 3/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8667 - loss: 0.3293
Epoch 4/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8676 - loss: 0.3229
Epoch 5/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8685 - loss: 0.3113
Epoch 6/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8728 - loss: 0.3056
Epoch 7/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8779 - loss: 0.3000
Epoch 8/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8687 - loss: 0.3011
Epoch 9/100
[1m

**Mania DeepNN**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Activation
from tensorflow.keras.constraints import maxnorm
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

def classify_DeepNN(X_train, X_test, y_train, y_test, Rating_test):
    num_classes = len(np.unique(y_train))

    # Normalize inputs
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')

    # Reshape for CNN input (28x28x1)
    X_train = np.reshape(X_train, (X_train.shape[0], 28, 28, 1))
    X_test = np.reshape(X_test, (X_test.shape[0], 28, 28, 1))

    # Convert labels to categorical
    y_train = to_categorical(y_train, num_classes)
    y_test = to_categorical(y_test, num_classes)

    # Create CNN model
    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=(28, 28, 1), padding='same', activation='relu'))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Flatten())
    model.add(Dropout(0.2))

    model.add(Dense(256, kernel_constraint=maxnorm(3), activation='relu'))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Dense(128, kernel_constraint=maxnorm(3), activation='relu'))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    model.add(Dense(num_classes, activation='softmax'))

    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Train model
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=128, verbose=1)

    # Make predictions
    y_pred = model.predict(X_test)
    y_pred_labels = np.argmax(y_pred, axis=1)
    y_test_labels = np.argmax(y_test, axis=1)

    # Compute evaluation metrics
    def rerating_accuracy(Rating_test, y_pred):
        max_rating = 5
        y_pred_labels = np.argmax(y_pred, axis=1)
        adjusted_ratings = np.array([3 if pred == 1 else Rating_test[i] for i, pred in enumerate(y_pred_labels)])
        re_acc = 1 - np.mean(np.abs(Rating_test - adjusted_ratings) / max_rating)
        return re_acc

    re_rating_acc = rerating_accuracy(Rating_test, y_pred)
    accuracy = accuracy_score(y_test_labels, y_pred_labels)
    precision = precision_score(y_test_labels, y_pred_labels, average='weighted')
    recall = recall_score(y_test_labels, y_pred_labels, average='weighted')
    f1 = f1_score(y_test_labels, y_pred_labels, average='weighted')

    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Re-Rating Accuracy: {re_rating_acc:.4f}')

    return model, y_pred, history

In [None]:
model, y_pred, history = classify_DeepNN(X_train_reshaped, X_test_reshaped, y_train, y_test, Rating_test)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 43ms/step - accuracy: 0.7305 - loss: 0.5968 - val_accuracy: 0.7499 - val_loss: 0.4667
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.8534 - loss: 0.3713 - val_accuracy: 0.8512 - val_loss: 0.3642
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8595 - loss: 0.3468 - val_accuracy: 0.8154 - val_loss: 0.4329
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8649 - loss: 0.3248 - val_accuracy: 0.8257 - val_loss: 0.3856
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.8621 - loss: 0.3288 - val_accuracy: 0.8344 - val_loss: 0.3782
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.8692 - loss: 0.3128 - val_accuracy: 0.8479 - val_loss: 0.3338
Epoch 7/10
[1m125/125[0m

In [None]:
# print(type(Rating_test), Rating_test.shape)
# print(type(y_pred), y_pred.shape)

In [None]:
def rerating_accuracy(Rating_test, y_pred):
    max_rating = 5

    # Convert y_pred to binary labels
    y_pred_labels = np.argmax(y_pred, axis=1)  # Assuming y_pred has shape (4099, 2)

    # Adjust ratings based on predicted labels
    adjusted_ratings = np.array([3 if pred == 1 else Rating_test[i] for i, pred in enumerate(y_pred_labels)])

    # Compute re-rating accuracy
    re_acc = 1 - np.mean(np.abs(Rating_test - adjusted_ratings) / max_rating)

    return re_acc
re_rating_acc = rerating_accuracy(Rating_test, y_pred)

print(re_rating_acc)

0.9435217608804403


**CNN_LSTM_main_DCNN**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, LeakyReLU
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

def classify_DeepNN(X_train, X_test, y_train, y_test, Rating_test):
    # Normalize inputs
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')

    # Reshape for CNN input (28x28x1)
    X_train = np.reshape(X_train, (X_train.shape[0], 28, 28, 1))
    X_test = np.reshape(X_test, (X_test.shape[0], 28, 28, 1))

    # Create CNN model
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='linear', input_shape=(28, 28, 1), padding='same'))
    model.add(LeakyReLU(alpha=0.1))
    model.add(MaxPooling2D((2, 2), padding='same'))
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))

    # Compile model
    adam = Adam()
    model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])

    # Train model
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=128, verbose=1)

    # Make predictions
    y_pred = model.predict(X_test)
    y_pred_labels = (y_pred > 0.5).astype(int)  # Convert to binary labels

    def rerating_accuracy(Rating_test, y_pred_labels):
        max_rating = 5
        adjusted_ratings = [3 if pred == 1 else Rating_test[i] for i, pred in enumerate(y_pred_labels)]
        re_acc = 1 - np.mean(np.abs(np.array(Rating_test) - np.array(adjusted_ratings)) / max_rating)
        return re_acc

    re_rating_acc = rerating_accuracy(Rating_test, y_pred)

    # Compute evaluation metrics
    accuracy = accuracy_score(y_test, y_pred_labels)
    precision = precision_score(y_test, y_pred_labels, average='weighted')
    recall = recall_score(y_test, y_pred_labels, average='weighted')
    f1 = f1_score(y_test, y_pred_labels, average='weighted')

    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Re-Rating Accuracy: {re_rating_acc:.4f}')

    return model, y_pred, history


In [None]:
model, y_pred, history = classify_DeepNN(X_train_reshaped, X_test_reshaped, y_train, y_test, Rating_test)

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.7147 - loss: 0.5522 - val_accuracy: 0.8017 - val_loss: 0.4367
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8187 - loss: 0.4129 - val_accuracy: 0.8567 - val_loss: 0.3590
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8538 - loss: 0.3623 - val_accuracy: 0.8694 - val_loss: 0.3514
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8582 - loss: 0.3409 - val_accuracy: 0.8609 - val_loss: 0.3373
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8644 - loss: 0.3324 - val_accuracy: 0.8642 - val_loss: 0.3340
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8687 - loss: 0.3223 - val_accuracy: 0.8497 - val_loss: 0.3382
Epoch 7/10
[1m125/125[0m [32m━━━━━━

**Deep Learning ensemble**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

def classify_Conv1D(X_train, X_test, y_train, y_test, Rating_test):
    # Reshape input to match Conv1D expected shape (samples, timesteps=1, features=784)
    X_train = X_train.reshape((X_train.shape[0], 784, 1))
    X_test = X_test.reshape((X_test.shape[0], 784, 1))

    # Create Conv1D model
    model = Sequential()
    model.add(Conv1D(32, kernel_size=3, activation='relu', input_shape=(784, 1)))
    model.add(Conv1D(64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2, padding='same'))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Train model
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=128, verbose=1)

    # Make predictions
    y_pred = model.predict(X_test)
    y_pred_labels = (y_pred > 0.5).astype(int)  # Convert to binary labels

    def rerating_accuracy(Rating_test, y_pred_labels):
        max_rating = 5
        adjusted_ratings = [3 if pred == 1 else Rating_test[i] for i, pred in enumerate(y_pred_labels)]
        re_acc = 1 - np.mean(np.abs(np.array(Rating_test) - np.array(adjusted_ratings)) / max_rating)
        return re_acc

    re_rating_acc = rerating_accuracy(Rating_test, y_pred)

    # Compute evaluation metrics
    accuracy = accuracy_score(y_test, y_pred_labels)
    precision = precision_score(y_test, y_pred_labels, average='weighted')
    recall = recall_score(y_test, y_pred_labels, average='weighted')
    f1 = f1_score(y_test, y_pred_labels, average='weighted')

    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Re-Rating Accuracy: {re_rating_acc:.4f}')

    return model, y_pred, history


In [None]:
X_train_padded.shape

(15989, 784)

In [None]:
model, y_pred, history = classify_Conv1D(X_train_padded, X_test_padded, y_train, y_test, Rating_test)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 31ms/step - accuracy: 0.7807 - loss: 0.4717 - val_accuracy: 0.8632 - val_loss: 0.3311
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 15ms/step - accuracy: 0.8674 - loss: 0.3167 - val_accuracy: 0.8639 - val_loss: 0.3314
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.8678 - loss: 0.3060 - val_accuracy: 0.8759 - val_loss: 0.3015
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.8721 - loss: 0.2984 - val_accuracy: 0.8832 - val_loss: 0.2826
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.8740 - loss: 0.2869 - val_accuracy: 0.8857 - val_loss: 0.2788
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.8791 - loss: 0.2718 - val_accuracy: 0.8857 - val_loss: 0.2685
Epoch 7/10
[1m125/125

In [None]:
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# COOA Functions
def initialize(n, m, l, u):
    ran_data = []
    for i in range(n):
        ran_data.append([random.uniform(0, 10) * (u - l) + l for _ in range(m)])
    return np.array(ran_data)

def func(soln):
    return [random.random() for _ in range(len(soln))]  # Random fitness values

def Pos_identification(pop, t, X_bst):
    r1 = random.uniform(0.1, 0.9)
    for j in range(len(pop)):
        pop[j] = ((1 - r1 * t) * (pop[j] + r1 * X_bst) + (2 * r1 * X_bst + (pop[j - 1]) * (1 - r1 * t) ** 2)) / 2
    return pop[j]

def Pos_suitable(pop, t, X_bst, l, u):
    r2 = random.uniform(0.1, 0.9)
    return r2  # Returns a random adjustment

def pso(beta, updated1, updated2, pop, X_bst, u1, u2):
    r3 = random.uniform(0.1, 0.9)
    for j in range(len(pop)):
        updated3 = beta * pop[j] + u1 * r3 * (X_bst - pop[j]) + u2 * r3 * (X_bst - pop[j])
    return updated3

def algm():
    N, M, l, u = 10, 10, 1, 5
    beta = 0.1
    t, Max_itr = 0, 10

    pop = initialize(N, M, l, u)
    fit = func(pop)
    X_bst = min(fit)

    while t < Max_itr:
        t += 1
        for i in range(len(pop)):
            for j in range(len(pop[i])):
                val = pop[i][j] * fit[i]
                pop[i][j] = val if val < X_bst else pop[i][j]
                updated1 = Pos_identification(pop[i - 1], t, X_bst)
                updated2 = Pos_suitable(pop[i], t, X_bst, l, u)
                updated3 = pso(beta, updated1, updated2, pop[i], X_bst, l, u)

    return np.max(updated3)  # Return optimized value

# CNN with COOA Integration
def train_and_evaluate_proposed_CNN_with_COOA(X_train, X_test, y_train, y_test, Rating_test):
    # Reshape input data
    X_train = X_train.reshape(-1, 28, 28, 1)
    X_test = X_test.reshape(-1, 28, 28, 1)

    # Normalize data
    X_train, X_test = X_train / 255.0, X_test / 255.0

    # Define hyperparameters
    learning = 0.0001
    Batch_size = 16
    Epochs = 100
    eps = 1e-08
    beta = 0.9
    betaa = 0.999
    verbose = 1

    # Build CNN Model
    model = Sequential([
        tf.keras.layers.Input(shape=(28, 28, 1)),
        Conv2D(32, (3, 3), activation='relu'),
        MaxPool2D(2, 2),
        Flatten(),
        Dense(100, activation='relu'),
        Dense(1, activation='sigmoid')  # Binary classification output
    ])

    # Compile model
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning, beta_1=beta, beta_2=betaa, epsilon=eps)
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=optimizer, metrics=['accuracy'])

    # Apply COOA to model weights
    init_weights = model.get_weights()
    model.set_weights([np.array(w) * algm() for w in init_weights])

    # Train model
    history = model.fit(X_train, y_train, epochs=Epochs, batch_size=Batch_size, verbose=verbose)

    # Predict on test data
    y_pred = (model.predict(X_test) > 0.5).astype(int)

    # Re-Rating Accuracy Calculation
    def rerating_accuracy(Rating_test, y_pred_labels):
        max_rating = 5
        adjusted_ratings = [3 if pred == 1 else Rating_test[i] for i, pred in enumerate(y_pred_labels)]
        return 1 - np.mean(np.abs(np.array(Rating_test) - np.array(adjusted_ratings)) / max_rating)

    re_rating_acc = rerating_accuracy(Rating_test, y_pred)

    # Compute evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    # Print metrics
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print(f'Re-Rating Accuracy: {re_rating_acc:.4f}')

    return model, y_pred, history


In [None]:
# model, y_pred, history = train_and_evaluate_proposed_CNN_with_COOA(X_train_reshaped, X_test_reshaped, y_train_resampled, y_test, Rating_test)

In [None]:
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import warnings, os
warnings.filterwarnings("ignore")
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Flatten, AveragePooling2D, MaxPool2D
from tensorflow.keras.layers import Dense, Conv2D
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

def initialize(n, m, l, u):
    ran_data = []
    for i in range(n):
        tem = []
        for j in range(m):
            tem.append(random.uniform(0, 10) * (u - l) + l)
        ran_data.append(tem)
    return ran_data

def func(soln):
    fit = []
    for i in range(len(soln)):
        fit.append(random.random())  # random fit
    return fit

def Pos_identification(pop, t, X_bst):
    r1 = random.uniform(0.1, 0.9)
    for j in range(len(pop)):
        pop[j] = ((1 - r1 * t) * (pop[j] + r1 * X_bst) + (2 * r1 * X_bst + (pop[j - 1]) * (1 - r1 * t) ** 2)) / 2
    return pop[j]

def Pos_suitable(pop, t, X_bst, l, u):
    r2 = random.uniform(0.1, 0.9)
    return r2

def pso(beta, updated1, updated2, pop, X_bst, u1, u2):
    r3 = random.uniform(0.1, 0.9)
    for j in range(len(pop)):
        updated3 = beta * pop[j] + u1 * r3 * (X_bst - pop[j]) + u2 * r3 * (X_bst - pop[j])
    return updated3

def COOA_algm():
    N, M, l, u = 10, 10, 1, 5
    beta = 0.1
    t, Max_itr = 0, 10
    pop = initialize(N, M, l, u)
    fit = func(pop)
    X_bst = min(fit)
    while t < Max_itr:
        t += 1
        for i in range(len(pop)):
            for j in range(len(pop[i])):
                val = pop[i][j] * fit[i]
                if val < X_bst:
                    pop[i][j] = val
                else:
                    pop[i][j] = pop[i][j]
                updated1 = Pos_identification(pop[i - 1], t, X_bst)
                updated2 = Pos_suitable(pop[i], t, X_bst, l, u)
                updated3 = pso(beta, updated1, updated2, pop[i], X_bst, l, u)
    return np.max(updated3)

def rerating_accuracy(original_rating, pred):
    max_rating = 5
    Re_rating = [5 if p == '1' else 4 for p in pred]
    Re_acc = [np.sum((1 / (max_rating * len(Re_rating))) * (int(original_rating[i])) - Re_rating[i]) for i in range(5)]
    return np.mean(Re_acc)

def fetch_hyperparameters_LeNet(X_train, X_test, y_train, y_test):
    tr_data = np.resize(X_train, (X_train.shape[0], 28, 28, 1))
    tst_data = np.resize(X_test, (X_train.shape[0], 28, 28, 1))
    tr_data, tst_data = tr_data / 255, tst_data / 255
    tr_lab = np.array(y_train).reshape(-1, 1)
    model = Sequential([
        Conv2D(6, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)),
        AveragePooling2D(),
        Conv2D(16, kernel_size=(3, 3), activation='relu'),
        AveragePooling2D(),
        Flatten(),
        Dense(120, activation='relu'),
        Dense(84, activation='relu'),
        Dense(1, activation='softmax')
    ])
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    model.set_weights(np.array(model.get_weights()) * COOA_algm())
    model.fit(tr_data, tr_lab, batch_size=16, epochs=10, verbose=0)
    return 16, 10, tf.keras.losses.categorical_crossentropy, 0.9, 0.999, 0.0001, 1e-08, 0

def train_CNN(X_train, X_test, y_train, y_test, batch_size, epochs, loss, beta_1, beta_2, lr, eps, verbose):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        MaxPool2D(2, 2),
        Flatten(),
        Dense(100, activation='relu'),
        Dense(1, activation='softmax')
    ])
    model.compile(loss=loss, optimizer='adam', metrics=['accuracy'])
    X_train1 = np.resize(X_train, (len(X_train), 28, 28, 1))
    X_test1 = np.resize(X_test, (len(X_test), 28, 28, 1))
    model.fit(X_train1, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)
    y_pred = model.predict(X_test1)
    return y_pred.flatten()

def classify(X_train, X_test, y_train, y_test, org_rt, ACC, PRE, REC, FM, Re_Acc):
    batch_size, epochs, loss, beta_1, beta_2, lr, eps, verbose = fetch_hyperparameters_LeNet(X_train, X_test, y_train, y_test)
    predict = train_CNN(X_train, X_test, y_train, y_test, batch_size, epochs, loss, beta_1, beta_2, lr, eps, verbose)
    target = y_test
    tp, tn, fn, fp = 0, 0, 0, 0
    for i in range(len(target)):
        if target[i] == predict[i]:
            tp += 1
        else:
            fn += 1
    acc = tp / len(target)
    pre = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    fm = 2 * (pre * recall) / (pre + recall) if (pre + recall) > 0 else 0
    ACC.append(acc)
    PRE.append(pre)
    REC.append(recall)
    FM.append(fm)
    re_acc = rerating_accuracy(org_rt, predict)
    Re_Acc.append(re_acc)
    return predict
