In [1]:
import torch
torch.cuda.empty_cache()



import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, random_split
from transformers import RobertaTokenizer, RobertaModel, AdamW, get_linear_schedule_with_warmup
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm

# Load and preprocess your data for H1, H2, and H3
model1_data = pd.read_csv("model1.csv")
texts_h1 = model1_data['text'].tolist()
labels_h1 = model1_data.iloc[:, 1:9].values.tolist()  # Assuming columns 1 to 8 are H1 labels

model2_data = pd.read_csv("model2.csv")
texts_h2 = model2_data['text'].tolist()
labels_h2 = model2_data.iloc[:, 9:41].values.tolist()  # Assuming columns 9 to 40 are H2 labels

model3_data = pd.read_csv("model3.csv")
texts_h3 = model3_data['text'].tolist()
labels_h3 = model3_data.iloc[:, 41:].values.tolist()  # Assuming columns 41 onwards are H3 labels

# Split the data for each hierarchy (adjust the ratios as needed)
def split_data(texts, labels, train_ratio, val_ratio, test_ratio):
    total_samples = len(texts)
    train_size = int(total_samples * train_ratio)
    val_size = int(total_samples * val_ratio)
    test_size = int(total_samples * test_ratio)

    train_texts = texts[:train_size]
    val_texts = texts[train_size:train_size + val_size]
    test_texts = texts[train_size + val_size:]

    train_labels = labels[:train_size]
    val_labels = labels[train_size:train_size + val_size]
    test_labels = labels[train_size + val_size:]

    return train_texts, val_texts, test_texts, train_labels, val_labels, test_labels

# Split the data for each hierarchy
train_texts_h1, val_texts_h1, test_texts_h1, train_labels_h1, val_labels_h1, test_labels_h1 = split_data(texts_h1, labels_h1, 0.7, 0.1, 0.2)
train_texts_h2, val_texts_h2, test_texts_h2, train_labels_h2, val_labels_h2, test_labels_h2 = split_data(texts_h2, labels_h2, 0.7, 0.1, 0.2)
train_texts_h3, val_texts_h3, test_texts_h3, train_labels_h3, val_labels_h3, test_labels_h3 = split_data(texts_h3, labels_h3, 0.7, 0.1, 0.2)

# Tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Define the TextDataset class
class TextDataset(Dataset):
    def __init__(self, texts, labels_h1, labels_h2, labels_h3, tokenizer, max_length=512):
        self.texts = texts
        self.labels_h1 = labels_h1
        self.labels_h2 = labels_h2
        self.labels_h3 = labels_h3
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        inputs = self.tokenizer.encode_plus(text, None, add_special_tokens=True, max_length=self.max_length, padding='max_length', return_token_type_ids=True, truncation=True)
    
        return {
          'ids': torch.tensor(inputs['input_ids'], dtype=torch.long),
          'mask': torch.tensor(inputs['attention_mask'], dtype=torch.long),
          'labels_h1': torch.tensor(self.labels_h1[idx], dtype=torch.float),
          'labels_h2': torch.tensor(self.labels_h2[idx], dtype=torch.float),
          'labels_h3': torch.tensor(self.labels_h3[idx], dtype=torch.float)
        }

# Create datasets and dataloaders for H1, H2, and H3
dataset_h1 = TextDataset(train_texts_h1 + val_texts_h1 + test_texts_h1, train_labels_h1 + val_labels_h1 + test_labels_h1, train_labels_h2 + val_labels_h2 + test_labels_h2, train_labels_h3 + val_labels_h3 + test_labels_h3, tokenizer)
dataset_h2 = TextDataset(train_texts_h2 + val_texts_h2 + test_texts_h2, train_labels_h1 + val_labels_h1 + test_labels_h1, train_labels_h2 + val_labels_h2 + test_labels_h2, train_labels_h3 + val_labels_h3 + test_labels_h3, tokenizer)
dataset_h3 = TextDataset(train_texts_h3 + val_texts_h3 + test_texts_h3, train_labels_h1 + val_labels_h1 + test_labels_h1, train_labels_h2 + val_labels_h2 + test_labels_h2, train_labels_h3 + val_labels_h3 + test_labels_h3, tokenizer)

# Split the datasets into train, val, and test for H1, H2, and H3
train_size_h1 = len(train_texts_h1)
val_size_h1 = len(val_texts_h1)
train_size_h2 = len(train_texts_h2)
val_size_h2 = len(val_texts_h2)
train_size_h3 = len(train_texts_h3)
val_size_h3 = len(val_texts_h3)

train_dataset_h1, val_dataset_h1, test_dataset_h1 = random_split(dataset_h1, [train_size_h1, val_size_h1, len(test_texts_h1)])
train_dataset_h2, val_dataset_h2, test_dataset_h2 = random_split(dataset_h2, [train_size_h2, val_size_h2, len(test_texts_h2)])
train_dataset_h3, val_dataset_h3, test_dataset_h3 = random_split(dataset_h3, [train_size_h3, val_size_h3, len(test_texts_h3)])

# Create dataloaders for H1, H2, and H3
train_dataloader_h1 = DataLoader(train_dataset_h1, batch_size=8, shuffle=True)
val_dataloader_h1 = DataLoader(val_dataset_h1,batch_size=8, shuffle=False)  # You can set shuffle to True if you want to shuffle the validation data.
train_dataloader_h2 = DataLoader(train_dataset_h2, batch_size=8, shuffle=True)
val_dataloader_h2 = DataLoader(val_dataset_h2, batch_size=8, shuffle=False)

train_dataloader_h3 = DataLoader(train_dataset_h3, batch_size=8, shuffle=True)
val_dataloader_h3 = DataLoader(val_dataset_h3, batch_size=8, shuffle=False)

# Define the model architecture for H1, H2, and H3
class MultiLabelClassifier(nn.Module):
    def __init__(self, num_labels_h1, num_labels_h2, num_labels_h3):
        super(MultiLabelClassifier, self).__init__()
        self.roberta = RobertaModel.from_pretrained('roberta-base')
        self.dropout = nn.Dropout(0.1)
        self.fc_h1 = nn.Linear(self.roberta.config.hidden_size, num_labels_h1)
        self.fc_h2 = nn.Linear(self.roberta.config.hidden_size, num_labels_h2)
        self.fc_h3 = nn.Linear(self.roberta.config.hidden_size, num_labels_h3)

    def forward(self, input_ids, attention_mask):
        outputs = self.roberta(input_ids, attention_mask)
        logits_h1 = self.fc_h1(self.dropout(outputs.last_hidden_state[:, 0, :]))  # Pooling strategy: [CLS] token
        logits_h2 = self.fc_h2(self.dropout(outputs.last_hidden_state[:, 0, :]))  # Pooling strategy: [CLS] token
        logits_h3 = self.fc_h3(self.dropout(outputs.last_hidden_state[:, 0, :]))  # Pooling strategy: [CLS] token
        return logits_h1, logits_h2, logits_h3

# Initialize and move the model to the appropriate device (CPU/GPU)
model = MultiLabelClassifier(num_labels_h1=len(train_labels_h1[0]), num_labels_h2=len(train_labels_h2[0]), num_labels_h3=len(train_labels_h3[0]))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = AdamW(model.parameters(), lr=1e-5)  # You can adjust the learning rate as needed

# Training loop for H1, H2, and H3
def train(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss = 0.0

    for batch in tqdm(dataloader, total=len(dataloader), desc="Training"):
        input_ids = batch['ids'].to(device)
        attention_mask = batch['mask'].to(device)
        labels_h1 = batch['labels_h1'].to(device)
        labels_h2 = batch['labels_h2'].to(device)
        labels_h3 = batch['labels_h3'].to(device)

        optimizer.zero_grad()

        logits_h1, logits_h2, logits_h3 = model(input_ids, attention_mask)
        loss_h1 = criterion(logits_h1, labels_h1)
        loss_h2 = criterion(logits_h2, labels_h2)
        loss_h3 = criterion(logits_h3, labels_h3)

        loss = loss_h1 + loss_h2 + loss_h3
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    return total_loss / len(dataloader)

# Validation loop for H1, H2, and H3
def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0.0
    all_preds_h1, all_preds_h2, all_preds_h3 = [], [], []
    all_labels_h1, all_labels_h2, all_labels_h3 = [], [], []

    with torch.no_grad():
        for batch in tqdm(dataloader, total=len(dataloader), desc="Validation"):
            input_ids = batch['ids'].to(device)
            attention_mask = batch['mask'].to(device)
            labels_h1 = batch['labels_h1'].to(device)
            labels_h2 = batch['labels_h2'].to(device)
            labels_h3 = batch['labels_h3'].to(device)

            logits_h1, logits_h2, logits_h3 = model(input_ids, attention_mask)
            loss_h1 = criterion(logits_h1, labels_h1)
            loss_h2 = criterion(logits_h2, labels_h2)
            loss_h3 = criterion(logits_h3, labels_h3)

            loss = loss_h1 + loss_h2 + loss_h3
            total_loss += loss.item()

            preds_h1 = torch.sigmoid(logits_h1)
            preds_h2 = torch.sigmoid(logits_h2)
            preds_h3 = torch.sigmoid(logits_h3)

            all_preds_h1.extend(preds_h1.cpu().numpy())
            all_preds_h2.extend(preds_h2.cpu().numpy())
            all_preds_h3.extend(preds_h3.cpu().numpy())

            all_labels_h1.extend(labels_h1.cpu().numpy())
            all_labels_h2.extend(labels_h2.cpu().numpy())
            all_labels_h3.extend(labels_h3.cpu().numpy())

    return total_loss / len(dataloader), all_preds_h1, all_preds_h2, all_preds_h3, all_labels_h1, all_labels_h2, all_labels_h3

# Training and evaluation for H1, H2, and H3
num_epochs = 5  # You can adjust the number of epochs as needed

for epoch in range(num_epochs):
    train_loss_h1 = train(model, train_dataloader_h1, optimizer, criterion, device)
    val_loss_h1, val_preds_h1, _, _, val_labels_h1, _, _ = evaluate(model, val_dataloader_h1, criterion, device)

    train_loss_h2 = train(model, train_dataloader_h2, optimizer, criterion, device)
    val_loss_h2, _, val_preds_h2, _, _, val_labels_h2, _ = evaluate(model, val_dataloader_h2, criterion, device)

    train_loss_h3 = train(model, train_dataloader_h3, optimizer, criterion, device)
    val_loss_h3, _, _, val_preds_h3, _, _, val_labels_h3 = evaluate(model, val_dataloader_h3, criterion, device)

    



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
    Found GPU1 Tesla K40c which is of cuda capability 3.5.
    PyTorch no longer supports this GPU because it is too old.
    The minimum cuda capability supported by this library is 3.7.
    
    Found GPU2 Tesla K40c which is of cuda capability 3.5.
    PyTorch no longer supports this GPU because it is too old.
    The minimum cuda capability supported by this library is 3.7.
    
Training: 100%|██████████| 308/308 [03:50<00:00,  1.34it/s]
Validation: 100%|██████████| 44/44 [00:12<00:00,  3.59it/s]
Training: 100%|██████████| 308/308 [03:50<00:00,  1.34it/s]
Validation: 100%|██████████| 44/44 [00:12<00:00,  3.61it/s]
Training: 100%|██████████| 308/308 [03:50<00:00,  1.34it/s]
Validation: 100%|██████

In [3]:
import numpy as np

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, average_precision_score



# Calculate and print metrics for H1
threshold_h1 = 0.5  # You can adjust this threshold as needed
val_preds_h1_binary = (np.array(val_preds_h1) > threshold_h1).astype(int)

acc_h1 = accuracy_score(val_labels_h1, val_preds_h1_binary)
precision_h1 = precision_score(val_labels_h1, val_preds_h1_binary, average='micro')
recall_h1 = recall_score(val_labels_h1, val_preds_h1_binary, average='micro')
f1_h1 = f1_score(val_labels_h1, val_preds_h1_binary, average='micro')
avg_precision_h1 = average_precision_score(val_labels_h1, val_preds_h1, average='micro')

print(f"Epoch {epoch+1}/{num_epochs} - H1: Train Loss: {train_loss_h1:.4f}, Val Loss: {val_loss_h1:.4f}")
print(f"Accuracy H1: {acc_h1:.4f}, Precision H1: {precision_h1:.4f}, Recall H1: {recall_h1:.4f}, F1 H1: {f1_h1:.4f}, Avg Precision H1: {avg_precision_h1:.4f}")

# Calculate and print metrics for H2
threshold_h2 = 0.5  # You can adjust this threshold as needed
val_preds_h2_binary = (np.array(val_preds_h2) > threshold_h2).astype(int)

acc_h2 = accuracy_score(val_labels_h2, val_preds_h2_binary)
precision_h2 = precision_score(val_labels_h2, val_preds_h2_binary, average='micro')
recall_h2 = recall_score(val_labels_h2, val_preds_h2_binary, average='micro')
f1_h2 = f1_score(val_labels_h2, val_preds_h2_binary, average='micro')
avg_precision_h2 = average_precision_score(val_labels_h2, val_preds_h2, average='micro')

print(f"Epoch {epoch+1}/{num_epochs} - H2: Train Loss: {train_loss_h2:.4f}, Val Loss: {val_loss_h2:.4f}")
print(f"Accuracy H2: {acc_h2:.4f}, Precision H2: {precision_h2:.4f}, Recall H2: {recall_h2:.4f}, F1 H2: {f1_h2:.4f}, Avg Precision H2: {avg_precision_h2:.4f}")

# Calculate and print metrics for H3
threshold_h3 = 0.5  # You can adjust this threshold as needed
val_preds_h3_binary = (np.array(val_preds_h3) > threshold_h3).astype(int)

acc_h3 = accuracy_score(val_labels_h3, val_preds_h3_binary)
precision_h3 = precision_score(val_labels_h3, val_preds_h3_binary, average='micro')
recall_h3 = recall_score(val_labels_h3, val_preds_h3_binary, average='micro')
f1_h3 = f1_score(val_labels_h3, val_preds_h3_binary, average='micro')
avg_precision_h3 = average_precision_score(val_labels_h3, val_preds_h3, average='micro')

print(f"Epoch {epoch+1}/{num_epochs} - H3: Train Loss: {train_loss_h3:.4f}, Val Loss: {val_loss_h3:.4f}")
print(f"Accuracy H3: {acc_h3:.4f}, Precision H3: {precision_h3:.4f}, Recall H3: {recall_h3:.4f}, F1 H3: {f1_h3:.4f}, Avg Precision H3: {avg_precision_h3:.4f}")


Epoch 5/5 - H1: Train Loss: 0.2019, Val Loss: 0.2230
Accuracy H1: 0.8348, Precision H1: 0.9617, Recall H1: 0.9697, F1 H1: 0.9657, Avg Precision H1: 0.9899
Epoch 5/5 - H2: Train Loss: 0.1843, Val Loss: 0.2155
Accuracy H2: 0.6040, Precision H2: 0.8949, Recall H2: 0.7608, F1 H2: 0.8224, Avg Precision H2: 0.8821
Epoch 5/5 - H3: Train Loss: 0.1711, Val Loss: 0.1989
Accuracy H3: 0.2365, Precision H3: 0.7796, Recall H3: 0.5074, F1 H3: 0.6147, Avg Precision H3: 0.6754


In [None]:

#rough
# Calculate and print metrics for H1, H2, and H3
    # You can use functions like accuracy_score, precision_score, recall_score, and f1_score from sklearn.metrics
    acc_h1 = accuracy_score(val_labels_h1, (np.array(val_preds_h1) > 0.5).astype(int))
    precision_h1 = precision_score(val_labels_h1, (np.array(val_preds_h1) > 0.5).astype(int))
    recall_h1 = recall_score(val_labels_h1, (np.array(val_preds_h1) > 0.5).astype(int))
    f1_h1 = f1_score(val_labels_h1, (np.array(val_preds_h1) > 0.5).astype(int))

    acc_h2 = accuracy_score(val_labels_h2, (np.array(val_preds_h2) > 0.5).astype(int))
    precision_h2 = precision_score(val_labels_h2, (np.array(val_preds_h2) > 0.5).astype(int))
    recall_h2 = recall_score(val_labels_h2, (np.array(val_preds_h2) > 0.5).astype(int))
    f1_h2 = f1_score(val_labels_h2, (np.array(val_preds_h2) > 0.5).astype(int))

    acc_h3 = accuracy_score(val_labels_h3, (np.array(val_preds_h3) > 0.5).astype(int))
    precision_h3 = precision_score(val_labels_h3, (np.array(val_preds_h3) > 0.5).astype(int))
    recall_h3 = recall_score(val_labels_h3, (np.array(val_preds_h3) > 0.5).astype(int))
    f1_h3 = f1_score(val_labels_h3, (np.array(val_preds_h3) > 0.5).astype(int))

    print(f"Epoch {epoch+1}/{num_epochs} - H1: Train Loss: {train_loss_h1:.4f}, Val Loss: {val_loss_h1:.4f}, Acc: {acc_h1:.4f}, Precision: {precision_h1:.4f}, Recall: {recall_h1:.4f}, F1: {f1_h1:.4f}")
    print(f"Epoch {epoch+1}/{num_epochs} - H2: Train Loss: {train_loss_h2:.4f}, Val Loss: {val_loss_h2:.4f}, Acc: {acc_h2:.4f}, Precision: {precision_h2:.4f}, Recall: {recall_h2:.4f}, F1: {f1_h2:.4f}")
    print(f"Epoch {epoch+1}/{num_epochs} - H3: Train Loss: {train_loss_h3:.4f}, Val Loss: {val_loss_h3:.4f}, Acc: {acc_h3:.4f}, Precision: {precision_h3:.4f}, Recall: {recall_h3:.4f}, F1: {f1_h3:.4f}")
# You can also add a test loop to evaluate the model on the test set if needed
