In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from torch.utils.data import DataLoader, TensorDataset

# Assume SciBERT embeddings are already generated (output dimension: 768)
# X_embeddings -> SciBERT embeddings of shape (num_samples, 768)
# y -> binary labels for the classification task (0 or 1)

# Example: Assuming you have your embeddings (X_embeddings) and labels (y)
# Convert embeddings and labels to PyTorch tensors
X_embeddings_tensor = torch.tensor(X_embeddings, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32).unsqueeze(1)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_embeddings_tensor, y_tensor, test_size=0.2, random_state=42)

# Create DataLoader
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32)

# Define the ANN architecture
class ANNClassifier(nn.Module):
    def __init__(self, input_size):
        super(ANNClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)  # First layer: 768 -> 128
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)         # Second layer: 128 -> 64
        self.fc3 = nn.Linear(64, 1)           # Output layer: 64 -> 1 (binary classification)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return self.sigmoid(x)

# Initialize the ANN model
input_size = X_embeddings.shape[1]  # 768 dimensions for SciBERT embeddings
model = ANNClassifier(input_size).to(device)

# Loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training loop
epochs = 5
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(X_batch)
        
        # Compute loss
        loss = criterion(outputs, y_batch)
        total_loss += loss.item()

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}')

# Evaluation
model.eval()
y_preds = []
y_true = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch)
        predictions = (outputs > 0.5).float()
        y_preds.extend(predictions.cpu().numpy())
        y_true.extend(y_batch.cpu().numpy())

# Calculate accuracy and print classification report
accuracy = accuracy_score(y_true, y_preds)
print(f'Test Accuracy: {accuracy * 100:.2f}%')
print(classification_report(y_true, y_preds, target_names=['Class 0', 'Class 1']))

# Add the predictions to the original dataframe
df2['ANN_predictions'] = (model(torch.tensor(X_embeddings, dtype=torch.float32).to(device)) > 0.5).cpu().numpy()


# TF

In [None]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# Assuming X_embeddings (SciBERT embeddings) and y (binary labels) are already generated
# Example:
# X_embeddings -> NumPy array of shape (num_samples, 768)
# y -> Binary labels, 0 or 1 (NumPy array of shape (num_samples,))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_embeddings, y, test_size=0.2, random_state=42)

# Convert data to TensorFlow tensors
X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
X_test = tf.convert_to_tensor(X_test, dtype=tf.float32)
y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
y_test = tf.convert_to_tensor(y_test, dtype=tf.float32)

# Define the ANN architecture
def create_ann(input_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, input_dim=input_dim, activation='relu'),  # 768 -> 128
        tf.keras.layers.Dense(64, activation='relu'),                        # 128 -> 64
        tf.keras.layers.Dense(1, activation='sigmoid')                       # 64 -> 1 (binary classification)
    ])
    return model

# Initialize the ANN model
input_dim = X_embeddings.shape[1]  # 768 dimensions for SciBERT embeddings
model = create_ann(input_dim)

# Compile the model with binary cross-entropy loss and Adam optimizer
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
epochs = 5
batch_size = 32

history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

# Get predictions for the test set
y_preds = (model.predict(X_test) > 0.5).astype(int)

# Print classification report
print(classification_report(y_test, y_preds, target_names=['Class 0', 'Class 1']))

# Add the predictions to the original dataframe
df2['ANN_predictions'] = (model.predict(X_embeddings) > 0.5).astype(int)
    

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
