## Dataset Splitting

In [None]:
from sklearn.model_selection import train_test_split

df = combined_df1_tfidf

# Define X (features) and y (target)
X = df.drop('label_Fake', axis=1)  # Features (all columns except the target)
y = df['label_Fake']  # Target column

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Print the shapes of the resulting datasets
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")


X_train shape: (14716, 1013)
X_test shape: (6308, 1013)
y_train shape: (14716,)
y_test shape: (6308,)


## Neural Netwroks

### Train and Evaluate the Initial NN Model

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Define the Neural Network model
model = Sequential()

# Input layer and the first hidden layer with ReLU activation
model.add(Dense(128, input_dim=X_train_resampled.shape[1], activation='relu'))

# Second hidden layer
model.add(Dense(64, activation='relu'))

# Output layer with sigmoid for binary classification
model.add(Dense(1, activation='sigmoid'))

# Compile the model
# - Binary crossentropy is used for binary classification
# - Adam optimizer with a learning rate of 0.001
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
# - Use validation_data as the test set to monitor performance
model.fit(X_train_resampled, y_train_resampled, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Make predictions on the test set
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32").ravel()  # Convert probabilities to class labels (0 or 1)

# Evaluate the Neural Network model on the test set
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Neural Network Model - Accuracy: {accuracy:.4f}")
print(f"Neural Network Model - Precision: {precision:.4f}")
print(f"Neural Network Model - Recall: {recall:.4f}")
print(f"Neural Network Model - F1-Score: {f1:.4f}")

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Neural Network Model - Accuracy: 0.8570
Neural Network Model - Precision: 0.8572
Neural Network Model - Recall: 0.8570
Neural Network Model - F1-Score: 0.8571
Confusion Matrix:
[[3339  460]
 [ 442 2067]]


### Tune and Evaluate the NN Model

In [None]:
from tensorflow.keras.layers import Dropout

# Define a new Neural Network model with Dropout
model = Sequential()

# Input layer and first hidden layer with ReLU activation
model.add(Dense(128, input_dim=X_train_resampled.shape[1], activation='relu'))
model.add(Dropout(0.3))  # Add Dropout to prevent overfitting

# Second hidden layer
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))

# Output layer with sigmoid for binary classification
model.add(Dense(1, activation='sigmoid'))

# Compile the model with Adam optimizer and a lower learning rate
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), metrics=['accuracy'])

# Train the model
model.fit(X_train_resampled, y_train_resampled, epochs=20, batch_size=64, validation_data=(X_test, y_test))

# Make predictions on the test set
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32").ravel()

# Evaluate the tuned Neural Network model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Tuned Neural Network Model - Accuracy: {accuracy:.4f}")
print(f"Tuned Neural Network Model - Precision: {precision:.4f}")
print(f"Tuned Neural Network Model - Recall: {recall:.4f}")
print(f"Tuned Neural Network Model - F1-Score: {f1:.4f}")

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Tuned Neural Network Model - Accuracy: 0.8573
Tuned Neural Network Model - Precision: 0.8571
Tuned Neural Network Model - Recall: 0.8573
Tuned Neural Network Model - F1-Score: 0.8572
Confusion Matrix:
[[3365  434]
 [ 466 2043]]


### Evaluate the Fairness of the NN Model

In [None]:
import numpy as np
from sklearn.metrics import precision_score, recall_score, confusion_matrix

# Assuming `sensitive_attr` holds the sensitive attribute 'source_LLM Generated' (0s and 1s)
sensitive_attr = X_test['source_LLM Generated']  # Ensure this is the correct column name

# Get predictions from the tuned model (already computed as `y_pred`)
# y_pred already exists from previous tuned model evaluation code

# Split predictions and true labels based on the sensitive attribute
y_true_0 = y_test[sensitive_attr == 0]
y_pred_0 = y_pred[sensitive_attr == 0]

y_true_1 = y_test[sensitive_attr == 1]
y_pred_1 = y_pred[sensitive_attr == 1]

# 1. Demographic Parity
# Proportion of positive predictions for both groups
demographic_parity_0 = np.mean(y_pred_0)
demographic_parity_1 = np.mean(y_pred_1)
demographic_parity_diff = demographic_parity_1 - demographic_parity_0

print(f"Demographic Parity - Group 0: {demographic_parity_0:.4f}, Group 1: {demographic_parity_1:.4f}")
print(f"Difference in Demographic Parity: {demographic_parity_diff:.4f}")

# 2. Equal Opportunity (True Positive Rate comparison)
# TPR for both groups
true_positives_0 = np.sum((y_true_0 == 1) & (y_pred_0 == 1))
true_positives_1 = np.sum((y_true_1 == 1) & (y_pred_1 == 1))

tpr_0 = true_positives_0 / np.sum(y_true_0 == 1)
tpr_1 = true_positives_1 / np.sum(y_true_1 == 1)
equal_opportunity_diff = tpr_1 - tpr_0

print(f"Equal Opportunity - TPR for Group 0: {tpr_0:.4f}, Group 1: {tpr_1:.4f}")
print(f"Difference in Equal Opportunity (TPR): {equal_opportunity_diff:.4f}")

# 3. Equalized Odds (TPR and FPR comparison)
# FPR for both groups
false_positives_0 = np.sum((y_true_0 == 0) & (y_pred_0 == 1))
false_positives_1 = np.sum((y_true_1 == 0) & (y_pred_1 == 1))

fpr_0 = false_positives_0 / np.sum(y_true_0 == 0)
fpr_1 = false_positives_1 / np.sum(y_true_1 == 0)
equalized_odds_tpr_diff = tpr_1 - tpr_0
equalized_odds_fpr_diff = fpr_0 - fpr_1

print(f"Equalized Odds - TPR for Group 0: {tpr_0:.4f}, Group 1: {tpr_1:.4f}")
print(f"Equalized Odds - FPR for Group 0: {fpr_0:.4f}, Group 1: {fpr_1:.4f}")
print(f"Difference in TPR (Equalized Odds): {equalized_odds_tpr_diff:.4f}")
print(f"Difference in FPR (Equalized Odds): {equalized_odds_fpr_diff:.4f}")

# 4. Predictive Parity (Precision comparison)
precision_0 = precision_score(y_true_0, y_pred_0)
precision_1 = precision_score(y_true_1, y_pred_1)
predictive_parity_diff = precision_1 - precision_0

print(f"Predictive Parity - Precision for Group 0: {precision_0:.4f}, Group 1: {precision_1:.4f}")
print(f"Difference in Predictive Parity (Precision): {predictive_parity_diff:.4f}")

# Summary of Differences:
overall_summary = {
    "Demographic Parity Difference": demographic_parity_diff,
    "Equal Opportunity (TPR) Difference": equal_opportunity_diff,
    "Equalized Odds TPR Difference": equalized_odds_tpr_diff,
    "Equalized Odds FPR Difference": equalized_odds_fpr_diff,
    "Predictive Parity Difference": predictive_parity_diff
}

print("\n=== Overall Fairness Summary ===")
for metric, diff in overall_summary.items():
    print(f"{metric}: {diff:.4f}")

Demographic Parity - Group 0: 0.3182, Group 1: 0.5044
Difference in Demographic Parity: 0.1862
Equal Opportunity - TPR for Group 0: 0.6818, Group 1: 0.9421
Difference in Equal Opportunity (TPR): 0.2602
Equalized Odds - TPR for Group 0: 0.6818, Group 1: 0.9421
Equalized Odds - FPR for Group 0: 0.1426, Group 1: 0.0561
Difference in TPR (Equalized Odds): 0.2602
Difference in FPR (Equalized Odds): 0.0865
Predictive Parity - Precision for Group 0: 0.6977, Group 1: 0.9450
Difference in Predictive Parity (Precision): 0.2473

=== Overall Fairness Summary ===
Demographic Parity Difference: 0.1862
Equal Opportunity (TPR) Difference: 0.2602
Equalized Odds TPR Difference: 0.2602
Equalized Odds FPR Difference: 0.0865
Predictive Parity Difference: 0.2473
