In [None]:
# Import necessary libraries
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from transformers import RobertaTokenizer, RobertaModel, AdamW
import nlpaug.augmenter.word as naw
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np  # Add this import statement to include NumPy


def load_data(file_path):
    try:
        data = pd.read_csv(file_path)
        
        # Check if the required columns exist
        if 'text' not in data.columns or len(data.columns) < 2:
            raise ValueError("CSV file does not contain 'text' and label columns.")

        texts = data['text'].tolist()
        labels = data.iloc[:, 1:].values.tolist()
        
        return texts, labels
    except Exception as e:
        print(f"Error loading data from {file_path}: {str(e)}")
        return None, None

# Example usage:
file_path_h1 = "model1.csv"
file_path_h2 = "model2.csv"
texts_h1, labels_h1 = load_data(file_path_h1)
texts_h2, labels_h2 = load_data(file_path_h2)
print(len(texts_h1))
print(len(texts_h2))
print(len(labels_h1))
print(len(labels_h2))

def split_data(texts, labels, train_ratio, val_ratio, test_ratio):
    total_samples = len(texts)
    assert train_ratio + val_ratio + test_ratio == 1.0, "Ratios must add up to 1.0"

    train_size = int(total_samples * train_ratio)
    val_size = int(total_samples * val_ratio)
    test_size = int(total_samples * test_ratio)

    train_texts = texts[:train_size]
    val_texts = texts[train_size:train_size + val_size]
    test_texts = texts[train_size + val_size:]

    train_labels = labels[:train_size]
    val_labels = labels[train_size:train_size + val_size]
    test_labels = labels[train_size + val_size:]

    return train_texts, val_texts, test_texts, train_labels, val_labels, test_labels

class TextDataset(Dataset):
    def __init__(self, texts, labels_h1, labels_h2, tokenizer, max_length=128):
        self.texts = texts
        self.labels_h1 = labels_h1
        self.labels_h2 = labels_h2
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        labels_h1 = self.labels_h1[idx]
        labels_h2 = self.labels_h2[idx]
        
        # Tokenize the text
        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        input_ids = encoding['input_ids'].squeeze()
        attention_mask = encoding['attention_mask'].squeeze()

        return {
            'ids': input_ids,
            'mask': attention_mask,
            'labels_h1': torch.FloatTensor(labels_h1),  # Assuming labels are in a multi-label format
            'labels_h2': torch.FloatTensor(labels_h2)
        }

# Tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Split the data into training, validation, and test sets for H1 and H2
train_texts_h1, val_texts_h1, test_texts_h1, train_labels_h1, val_labels_h1, test_labels_h1 = split_data(texts_h1, labels_h1, 0.7, 0.1, 0.2)
train_texts_h2, val_texts_h2, test_texts_h2, train_labels_h2, val_labels_h2, test_labels_h2 = split_data(texts_h2, labels_h2, 0.7, 0.1, 0.2)
print("Length of train_texts_h1 before augmentation:", len(train_texts_h1))
print("Length of train_texts_h2 before augmentation:", len(train_texts_h2))

# Ensure the number of samples is the same for H1 and H2
assert len(train_texts_h1) == len(train_texts_h2), "Training data sizes do not match"
assert len(val_texts_h1) == len(val_texts_h2), "Validation data sizes do not match"
assert len(test_texts_h1) == len(test_texts_h2), "Test data sizes do not match"

print("H1 Data Split Sizes:")
print(f"Training data size for H1: {len(train_texts_h1)}")
print(f"Validation data size for H1: {len(val_texts_h1)}")
print(f"Test data size for H1: {len(test_texts_h1)}")

print("\nH2 Data Split Sizes:")
print(f"Training data size for H2: {len(train_texts_h2)}")
print(f"Validation data size for H2: {len(val_texts_h2)}")
print(f"Test data size for H2: {len(test_texts_h2)}")

print(f"Training data size for labels of H1: {len(train_labels_h1)}")
print(f"Validating data size for labels of H1: {len(val_labels_h1)}")
print(f"Testing data size for labels of H1: {len(test_labels_h1)}")

print(f"Training data size for labels of H2: {len(train_labels_h2)}")
print(f"Validating data size for labels of H2: {len(val_labels_h2)}")
print(f"Testing data size for labels of H2: {len(test_labels_h2)}")

# Define a function for data augmentation
def augment_data(texts, labels, num_augmentations=3):
    aug = naw.SynonymAug(aug_src='wordnet')
    augmented_texts = []
    augmented_labels = []
    for text, label in zip(texts, labels):
        augmented_samples = [aug.augment(text) for _ in range(num_augmentations)]
        augmented_texts.extend(augmented_samples)
        augmented_labels.extend([label] * num_augmentations)  # Use the same label for augmented samples
    return augmented_texts, augmented_labels

# Apply data augmentation to both H1 and H2 training data
num_augmentations = 3
augmented_train_texts_h1, augmented_train_labels_h1 = augment_data(train_texts_h1, train_labels_h1, num_augmentations)
augmented_train_texts_h2, augmented_train_labels_h2 = augment_data(train_texts_h2, train_labels_h2, num_augmentations)
print("Length of train_texts_h1 after augmentation:", len(augmented_train_texts_h1))
print("Length of train_texts_h2 after augmentation:", len(augmented_train_texts_h2))

# Combine the original data with augmented data
train_texts_h1 = train_texts_h1 + augmented_train_texts_h1
train_labels_h1 = train_labels_h1 + augmented_train_labels_h1

train_texts_h2 = train_texts_h2 + augmented_train_texts_h2
train_labels_h2 = train_labels_h2 + augmented_train_labels_h2

print("Length of train_texts_h1 combined:", len(train_texts_h1))
print("Length of train_texts_h2 combined:", len(train_texts_h2))

print("Length of train_labels_h1 combined:", len(train_labels_h1))
print("Length of train_labels_h2 combined:", len(train_labels_h2))

# Create dataloaders for H1 and H2 using the augmented training data
train_dataset_h1 = TextDataset(train_texts_h1, train_labels_h1, train_labels_h2, tokenizer)
train_dataset_h2 = TextDataset(train_texts_h2, train_labels_h1, train_labels_h2, tokenizer)

# Create dataloaders for validation and test data
val_dataset_h1 = TextDataset(val_texts_h1, val_labels_h1, val_labels_h2, tokenizer)
test_dataset_h1 = TextDataset(test_texts_h1, test_labels_h1, test_labels_h2, tokenizer)

val_dataset_h2 = TextDataset(val_texts_h2, val_labels_h1, val_labels_h2, tokenizer)
test_dataset_h2 = TextDataset(test_texts_h2, test_labels_h1, test_labels_h2, tokenizer)

# Create dataloaders for H1 and H2
train_dataloader_h1 = DataLoader(train_dataset_h1, batch_size=32, shuffle=True)
train_dataloader_h2 = DataLoader(train_dataset_h2, batch_size=32, shuffle=True)

val_dataloader_h1 = DataLoader(val_dataset_h1, batch_size=32, shuffle=False)
val_dataloader_h2 = DataLoader(val_dataset_h2, batch_size=32, shuffle=False)

test_dataloader_h1 = DataLoader(test_dataset_h1, batch_size=32, shuffle=False)
test_dataloader_h2 = DataLoader(test_dataset_h2, batch_size=32, shuffle=False)

print(len(train_dataloader_h1))
print(len(train_dataloader_h2))
print(len(val_dataloader_h1))
print(len(val_dataloader_h2))
print(len(test_dataloader_h1))
print(len(test_dataloader_h2))

# Define the number of labels for H1 and H2
num_labels_h1 = 8  # Replace with the actual number of labels for H1
num_labels_h2 = 32  # Replace with the actual number of labels for H2

# Define the multi-label classifier model
class MultiLabelClassifier(nn.Module):
    def __init__(self, num_labels_h1, num_labels_h2):
        super(MultiLabelClassifier, self).__init__()
        self.roberta = RobertaModel.from_pretrained('roberta-base')
        self.dropout = nn.Dropout(0.1)
        self.classifier_h1 = nn.Linear(self.roberta.config.hidden_size, num_labels_h1)
        self.classifier_h2 = nn.Linear(self.roberta.config.hidden_size, num_labels_h2)

    def forward(self, input_ids, attention_mask):
        outputs = self.roberta(input_ids, attention_mask)
        sequence_output = outputs.last_hidden_state
        pooled_output = sequence_output.mean(dim=1)
        logits_h1 = self.classifier_h1(pooled_output)
        logits_h2 = self.classifier_h2(pooled_output)
        return logits_h1, logits_h2

# Define the number of epochs
num_epochs = 1

# Define the batch size
batch_size = 32

# Create the model and move it to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_h1 = MultiLabelClassifier(num_labels_h1, num_labels_h2).to(device)
model_h2 = MultiLabelClassifier(num_labels_h1, num_labels_h2).to(device)

# Define optimizers and loss functions for H1 and H2 with weights
optimizer_h1 = AdamW(model_h1.parameters(), lr=1e-5)
loss_weight_h1 = 0.25  # Weight for H1
loss_fn_h1 = nn.BCEWithLogitsLoss()

optimizer_h2 = AdamW(model_h2.parameters(), lr=1e-5)
loss_weight_h2 = 0.75  # Weight for H2
loss_fn_h2 = nn.BCEWithLogitsLoss()

# Define a validation function
def validate_model(model, val_dataloader, loss_fn, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in val_dataloader:
            input_ids = batch['ids'].to(device)
            attention_mask = batch['mask'].to(device)
            labels_h1 = batch['labels_h1'].to(device)
            labels_h2 = batch['labels_h2'].to(device)

            logits_h1, logits_h2 = model(input_ids, attention_mask)
            loss_h1 = loss_fn(logits_h1, labels_h1)
            loss_h2 = loss_fn(logits_h2, labels_h2)
            loss = (loss_weight_h1 * loss_h1) + (loss_weight_h2 * loss_h2)  # Weighted combination

            total_loss += loss.item()

    avg_val_loss = total_loss / len(val_dataloader)
    return avg_val_loss

# Training loop for H1 and H2 with weights
def train_model(model, dataloader, optimizer, loss_fn, device, num_epochs, val_dataloader):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in dataloader:
            input_ids = batch['ids'].to(device)
            attention_mask = batch['mask'].to(device)
            labels_h1 = batch['labels_h1'].to(device)
            labels_h2 = batch['labels_h2'].to(device)

            optimizer.zero_grad()
            logits_h1, logits_h2 = model(input_ids, attention_mask)
            loss_h1 = loss_fn(logits_h1, labels_h1)
            loss_h2 = loss_fn(logits_h2, labels_h2)
            loss = (loss_weight_h1 * loss_h1) + (loss_weight_h2 * loss_h2)  # Weighted combination

            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_train_loss = total_loss / len(dataloader)

        # Calculate the validation loss for H1 and H2
        val_loss = validate_model(model, val_dataloader, loss_fn, device)

        print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {avg_train_loss:.4f}, Validation Loss: {val_loss:.4f}")

# Training and validation for H1 and H2 with weights
train_model(model_h1, train_dataloader_h1, optimizer_h1, loss_fn_h1, device, num_epochs, val_dataloader_h1)
train_model(model_h2, train_dataloader_h2, optimizer_h2, loss_fn_h2, device, num_epochs, val_dataloader_h2)

# Evaluation loop for H1 and H2
# Evaluation loop for H1
def evaluate_h1(model_h1, test_dataloader_h1, test_labels_h1, device):
    model_h1.eval()
    all_preds_h1 = []

    with torch.no_grad():
        for batch_h1 in test_dataloader_h1:
            input_ids_h1 = batch_h1['ids'].to(device)
            attention_mask_h1 = batch_h1['mask'].to(device)

            logits_h1, _ = model_h1(input_ids_h1, attention_mask_h1)
            preds_h1 = torch.sigmoid(logits_h1).cpu().numpy()
            all_preds_h1.append(preds_h1)

    all_preds_h1 = np.vstack(all_preds_h1)
    true_labels_h1 = test_labels_h1  # Replace with the actual true labels

    # Define a threshold for classifying binary labels (you may need to adjust this threshold)
    threshold = 0.5
    binary_preds_h1 = (all_preds_h1 > threshold).astype(int)

    # Calculate evaluation metrics for H1
    accuracy_h1 = accuracy_score(true_labels_h1, binary_preds_h1)
    precision_h1 = precision_score(true_labels_h1, binary_preds_h1, average='weighted')
    recall_h1 = recall_score(true_labels_h1, binary_preds_h1, average='weighted')
    f1_score_h1 = f1_score(true_labels_h1, binary_preds_h1, average='weighted')

    print("\nEvaluation Metrics for H1:")
    print(f"Accuracy: {accuracy_h1:.4f}")
    print(f"Precision: {precision_h1:.4f}")
    print(f"Recall: {recall_h1:.4f}")
    print(f"F1 Score: {f1_score_h1:.4f}")

# Evaluation loop for H2
def evaluate_h2(model_h2, test_dataloader_h2, test_labels_h2, device):
    model_h2.eval()
    all_preds_h2 = []

    with torch.no_grad():
        for batch_h2 in test_dataloader_h2:
            input_ids_h2 = batch_h2['ids'].to(device)
            attention_mask_h2 = batch_h2['mask'].to(device)

            _, logits_h2 = model_h2(input_ids_h2, attention_mask_h2)
            preds_h2 = torch.sigmoid(logits_h2).cpu().numpy()
            all_preds_h2.append(preds_h2)

    all_preds_h2 = np.vstack(all_preds_h2)
    true_labels_h2 = test_labels_h2  # Replace with the actual true labels

    # Define a threshold for classifying binary labels (you may need to adjust this threshold)
    threshold = 0.5
    binary_preds_h2 = (all_preds_h2 > threshold).astype(int)

    # Calculate evaluation metrics for H2
    accuracy_h2 = accuracy_score(true_labels_h2, binary_preds_h2)
    precision_h2 = precision_score(true_labels_h2, binary_preds_h2, average='weighted')
    recall_h2 = recall_score(true_labels_h2, binary_preds_h2, average='weighted')
    f1_score_h2 = f1_score(true_labels_h2, binary_preds_h2, average='weighted')

    print("\nEvaluation Metrics for H2:")
    print(f"Accuracy: {accuracy_h2:.4f}")
    print(f"Precision: {precision_h2:.4f}")
    print(f"Recall: {recall_h2:.4f}")
    print(f"F1 Score: {f1_score_h2:.4f}")

# Call the evaluation functions separately for H1 and H2
evaluate_h1(model_h1, test_dataloader_h1, test_labels_h1, device)
evaluate_h2(model_h2, test_dataloader_h2, test_labels_h2, device)
