In [4]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset with extracted features
dataset_path = "C:/Users/rajar/OneDrive/Desktop/New folder/Fake-News/combined_fake_news_with_text_image_embeddings.csv"
df = pd.read_csv(dataset_path)

# Define feature columns
image_feature_cols = [col for col in df.columns if col.startswith("img_feat_")]
text_feature_cols = [col for col in df.columns if col.startswith("text_feat_")]

# Ensure all features are numerical
df[image_feature_cols] = df[image_feature_cols].apply(pd.to_numeric, errors="coerce")
df[text_feature_cols] = df[text_feature_cols].apply(pd.to_numeric, errors="coerce")

# Convert labels to numeric (ensuring correct data types)
df["CLIP_Label"] = pd.to_numeric(df["CLIP_Label"], errors="coerce").fillna(0).astype(np.float32)
df["Rating_Binary"] = pd.to_numeric(df["Rating_Binary"], errors="coerce").fillna(0).astype(np.float32)

# Prepare inputs (features) and targets (labels)
X = df[image_feature_cols + text_feature_cols].values
y_clip = df["CLIP_Label"].values.astype(np.float32)  # Ensure correct type
y_rating = df["Rating_Binary"].values.astype(np.float32)

# Train-test split
X_train, X_test, y_clip_train, y_clip_test, y_rating_train, y_rating_test = train_test_split(
    X, y_clip, y_rating, test_size=0.2, random_state=42
)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_clip_train_tensor = torch.tensor(y_clip_train, dtype=torch.float32).unsqueeze(1)
y_clip_test_tensor = torch.tensor(y_clip_test, dtype=torch.float32).unsqueeze(1)
y_rating_train_tensor = torch.tensor(y_rating_train, dtype=torch.float32).unsqueeze(1)
y_rating_test_tensor = torch.tensor(y_rating_test, dtype=torch.float32).unsqueeze(1)

# Define Multi-Task Neural Network
class MultiTaskModel(nn.Module):
    def __init__(self, input_size):
        super(MultiTaskModel, self).__init__()
        self.shared_fc1 = nn.Linear(input_size, 512)
        self.shared_fc2 = nn.Linear(512, 256)
        self.relu = nn.ReLU()

        # Task 1: CLIP Label (Image-Text Relationship)
        self.fc_clip = nn.Linear(256, 1)
        self.sigmoid_clip = nn.Sigmoid()

        # Task 2: Rating Binary (Truthfulness)
        self.fc_rating = nn.Linear(256, 1)
        self.sigmoid_rating = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.shared_fc1(x))
        x = self.relu(self.shared_fc2(x))
        
        # Separate outputs
        clip_output = self.sigmoid_clip(self.fc_clip(x))
        rating_output = self.sigmoid_rating(self.fc_rating(x))

        return clip_output, rating_output

# Initialize model
input_size = X_train.shape[1]
model = MultiTaskModel(input_size)

# Define Loss and Optimizer
criterion = nn.BCELoss()  # Binary Cross Entropy for both tasks
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 20
batch_size = 32

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    
    clip_output, rating_output = model(X_train_tensor)
    
    loss_clip = criterion(clip_output, y_clip_train_tensor)
    loss_rating = criterion(rating_output, y_rating_train_tensor)
    
    loss = loss_clip + loss_rating  # Multi-task loss (sum of both)
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 5 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], CLIP Loss: {loss_clip.item():.4f}, Rating Loss: {loss_rating.item():.4f}")

# Evaluation
model.eval()
with torch.no_grad():
    y_clip_pred, y_rating_pred = model(X_test_tensor)
    y_clip_pred = (y_clip_pred.numpy() > 0.5).astype(int).flatten()
    y_rating_pred = (y_rating_pred.numpy() > 0.5).astype(int).flatten()

# Compute overall accuracy
accuracy_clip = accuracy_score(y_clip_test, y_clip_pred)
accuracy_rating = accuracy_score(y_rating_test, y_rating_pred)

# Compute class-wise accuracy
conf_matrix_clip = confusion_matrix(y_clip_test, y_clip_pred)
conf_matrix_rating = confusion_matrix(y_rating_test, y_rating_pred)

class_accuracy_clip = conf_matrix_clip.diagonal() / conf_matrix_clip.sum(axis=1)
class_accuracy_rating = conf_matrix_rating.diagonal() / conf_matrix_rating.sum(axis=1)

# Print results
print(f"CLIP Label Accuracy: {accuracy_clip:.4f}")
print(f"Rating Label Accuracy: {accuracy_rating:.4f}")

print("\nCLIP Label Classification Report:\n", classification_report(y_clip_test, y_clip_pred))
print("\nRating Label Classification Report:\n", classification_report(y_rating_test, y_rating_pred))

print("\nClass-wise Accuracy for CLIP Label:")
for i, acc in enumerate(class_accuracy_clip):
    print(f"Class {i}: {acc:.4f}")

print("\nClass-wise Accuracy for Rating Label:")
for i, acc in enumerate(class_accuracy_rating):
    print(f"Class {i}: {acc:.4f}")


Epoch [5/20], CLIP Loss: 0.0799, Rating Loss: 0.0481
Epoch [10/20], CLIP Loss: 0.0000, Rating Loss: 0.0000
Epoch [15/20], CLIP Loss: 0.0000, Rating Loss: 0.0000
Epoch [20/20], CLIP Loss: 0.0000, Rating Loss: 0.0000
CLIP Label Accuracy: 1.0000
Rating Label Accuracy: 1.0000

CLIP Label Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        51

    accuracy                           1.00        51
   macro avg       1.00      1.00      1.00        51
weighted avg       1.00      1.00      1.00        51


Rating Label Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        51

    accuracy                           1.00        51
   macro avg       1.00      1.00      1.00        51
weighted avg       1.00      1.00      1.00        51


Class-wise Accuracy for CLIP Label:
Class 0: 1.0000

Class-wise Accuracy for Rating Label:
Class 0: 1.0000

