In [8]:
import torch
import torch.nn as nn
import os
from model import RobertaGAT
from dataset import CustomDataset
from torch_geometric.loader import DataLoader

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [9]:
# 加载数据集
from datasets import load_dataset, DatasetDict

dataset_train = load_dataset('csv', data_files='train.csv', encoding='utf-8')
dataset_test = load_dataset('csv', data_files='test.csv', encoding='utf-8')
dataset_valid = load_dataset('csv', data_files='valid.csv', encoding='utf-8')
dataset = DatasetDict({'train': dataset_train, 'test': dataset_test, 'validation': dataset_valid})
# dataset = DatasetDict({'test': dataset_test})

In [10]:
# 分词
from transformers import RobertaTokenizer

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
def encode_batch(batch):
    return tokenizer(batch['text'], padding='max_length', truncation=True, max_length=48, return_tensors="pt")

dataset = {split: dataset[split].map(encode_batch, batched=True) for split in dataset.keys()}
dataset['test']['train'] = dataset['test']['train'].remove_columns('text')
dataset['train']['train'] = dataset['train']['train'].remove_columns('text')
dataset['validation']['train'] = dataset['validation']['train'].remove_columns('text')

Map:   0%|          | 0/10323 [00:00<?, ? examples/s]

Map:   0%|          | 0/9775 [00:00<?, ? examples/s]

In [11]:
from utils import get_sentence_rel

# 获取边关系
train_sen_rel = get_sentence_rel(path='train.csv')
test_sen_rel = get_sentence_rel(path='test.csv')
valid_sen_rel = get_sentence_rel(path='valid.csv')

In [12]:
from utils import spilt_node

train_data = spilt_node(dataset['train']['train'])
test_data = spilt_node(dataset['test']['train'])

train_dataset = CustomDataset(train_data, train_sen_rel, 20, 8)
test_dataset = CustomDataset(test_data, test_sen_rel, 20, 8)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True)

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RobertaGAT("roberta-base", num_classes=4)
model.roberta_first_half.to('cuda:0')
model.roberta_second_half.to('cuda:1')

weights = torch.tensor([0.1915, 0.2766, 0.3404, 0.1915], dtype=torch.float32)
weights = weights.to('cuda:1')  

# optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-6, weight_decay=1e-4)

criterion = nn.CrossEntropyLoss(ignore_index=4, weight=weights)
num_epochs = 50

# writer = SummaryWriter('log/robert-gat')

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
from sklearn.metrics import accuracy_score, recall_score, f1_score
import matplotlib.pyplot as plt

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    
    for batch in train_loader:
        input_ids = batch[0]['x']
        attention_mask = batch[0]['mask']
        labels = batch[0]['y']
        edge_index = batch[0]['edge_index']
        num_nodes_graph = 0
        
        for i in range(1, len(batch)):
            edge_index_tmp = (batch[i]['edge_index'] + num_nodes_graph)
            edge_index = torch.cat((edge_index, edge_index_tmp), dim=1)
            num_nodes_graph += batch[i]['x'].size(0)
            input_ids = torch.cat((input_ids, batch[i]['x']), dim=0)
            attention_mask  = torch.cat((attention_mask, batch[i]['mask']), dim=0)
            labels = torch.cat((labels, batch[i]['y']), dim=0)
            
        input_ids = input_ids.to('cuda:0')
        attention_mask = attention_mask.to('cuda:0')
        edge_index = edge_index.to('cuda:1')
        labels = labels.to('cuda:1')
        
        optimizer.zero_grad()
        
        output, weight1 = model(input_ids, attention_mask, edge_index)
        loss = criterion(output.to("cuda:1"), labels)
        
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
        torch.cuda.empty_cache()

    avg_loss = total_loss / len(train_loader)

    model.eval()  # 测试
    total_test_loss = 0
    all_labels = []
    all_preds = []
    with torch.no_grad():  
        for batch in test_loader:
            input_ids = batch[0]['x']
            attention_mask = batch[0]['mask']
            labels = batch[0]['y']
            edge_index = batch[0]['edge_index']
            num_nodes_graph = 0
            
            for i in range(1, len(batch)):
                edge_index_tmp = (batch[i]['edge_index'] + num_nodes_graph)
                edge_index = torch.cat((edge_index, edge_index_tmp), dim=1)
                num_nodes_graph += batch[i]['x'].size(0)
                input_ids = torch.cat((input_ids, batch[i]['x']), dim=0)
                attention_mask  = torch.cat((attention_mask, batch[i]['mask']), dim=0)
                labels = torch.cat((labels, batch[i]['y']), dim=0)
                
            input_ids = input_ids.to('cuda:0')
            attention_mask = attention_mask.to('cuda:0')
            edge_index = edge_index.to('cuda:1')
            labels = labels.to('cuda:1')
            
            output, weight1 = model(input_ids, attention_mask, edge_index)
            loss = criterion(output.to("cuda:1"), labels)
            total_test_loss += loss.item()
            
            preds = output.argmax(dim=1)

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
            torch.cuda.empty_cache()
            
    avg_test_loss = total_test_loss / len(test_loader)

    accuracy = accuracy_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
   
    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_loss}, Testing Loss: {avg_test_loss}, Accuracy: {accuracy}, Recall: {recall}, F1 Score: {f1}")
    
    
plt.tight_layout()
plt.savefig('training_performance.png')
plt.show()

Epoch 1/50, Training Loss: 1.2982218308108193, Testing Loss: 0.7862927552900816, Accuracy: 0.6123563545340767, Recall: 0.4793921299517835, F1 Score: 0.4446100302774011
Epoch 2/50, Training Loss: 0.594910149882128, Testing Loss: 0.4986977553681323, Accuracy: 0.7438844009557402, Recall: 0.624815977882143, F1 Score: 0.6009878004332174
Epoch 3/50, Training Loss: 0.4875512620904943, Testing Loss: 0.4514888893616827, Accuracy: 0.7678916827852998, Recall: 0.6486419418917118, F1 Score: 0.623994635486641
Epoch 4/50, Training Loss: 0.45367323631768697, Testing Loss: 0.4379161808051561, Accuracy: 0.7771077483217659, Recall: 0.659326099345553, F1 Score: 0.6324788588075163
Epoch 5/50, Training Loss: 0.435842141017809, Testing Loss: 0.41798944771289825, Accuracy: 0.7825691204915235, Recall: 0.660477230263532, F1 Score: 0.6357808888802786
Epoch 6/50, Training Loss: 0.4128171567405973, Testing Loss: 0.40924267235555145, Accuracy: 0.7876891569006713, Recall: 0.667738961559088, F1 Score: 0.6424909699198

<Figure size 640x480 with 0 Axes>