In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset with extracted features
dataset_path = "politifact_features.csv"
df = pd.read_csv(dataset_path)

# Define feature columns
image_feature_cols = [col for col in df.columns if col.startswith("img_feat_")]
text_feature_cols = [col for col in df.columns if col.startswith("text_feat_")]

# Ensure all features are numerical
df[image_feature_cols] = df[image_feature_cols].apply(pd.to_numeric, errors="coerce")
df[text_feature_cols] = df[text_feature_cols].apply(pd.to_numeric, errors="coerce")

# Convert labels to binary (1 = Related, 0 = Unrelated)
df["Label"] = df["CLIP_Label"].apply(lambda x: 1 if x == "Related" else 0)

# Prepare inputs (features) and targets (labels)
X = df[image_feature_cols + text_feature_cols].values
y = df["Label"].values

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Define Early Fusion Neural Network
class EarlyFusionClassifier(nn.Module):
    def __init__(self, input_size):
        super(EarlyFusionClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

# Initialize model
input_size = X_train.shape[1]
model = EarlyFusionClassifier(input_size)

# Define Loss and Optimizer
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 20
batch_size = 32

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 5 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Evaluation
model.eval()
with torch.no_grad():
    y_pred = model(X_test_tensor)
    y_pred = (y_pred.numpy() > 0.5).astype(int).flatten()

# Compute metrics
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)


FileNotFoundError: [Errno 2] No such file or directory: 'politifact_features.csv'

: 