## Data augmentation with Llama 3

In [None]:
!pip install --upgrade accelerate transformers

In [None]:
!pip install flash-attn

In [None]:
!pip install transformers accelerate bitsandbytes

Collecting accelerate
  Downloading accelerate-0.32.1-py3-none-any.whl (314 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.1/314.1 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.10.0->acceler

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig

#quantization_config = BitsAndBytesConfig(load_in_8bit=True)
# Initialize the tokenizer and model from Hugging Face
model_id = "meta-llama/Meta-Llama-3-8B"  # Placeholder model name, replace with the correct one if available
# meta-llama/Meta-Llama-3-8B
# google/gemma-2b
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
#tokenizer.add_special_tokens({'pad_token': '[PAD]'})

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="cuda:0",
    low_cpu_mem_usage=True,
    max_length=512,
    #\quantization_config=quantization_config,
    #attn_implementation="flash_attention_2"
)
#model.generation_config.pad_token_id = tokenizer.pad_token_id

# Mocking the model generation for illustration purposes
def generate_similar_contexts(text):

    # Tokenize the input text
    input_ids = tokenizer(
        text,
        padding=True,
        truncation=True,
        max_length=64,
        return_tensors="pt").to("cuda")

    # Generate output
    #outputs = model.generate(**input_ids, max_new_tokens=100, do_sample=True, min_length=512, num_beams=6, num_beam_groups=3, diversity_penalty=0.3, num_return_sequences=4, repetition_penalty=1.4)
    outputs = model.generate(**input_ids, do_sample=True, max_length=256, top_k=75, top_p=0.55, temperature=0.9, num_return_sequences=4)
    # Decode and print the output
    print(tokenizer.decode(outputs[1]))
    print(tokenizer.decode(outputs[2]))
    print(tokenizer.decode(outputs[3]))
    return tokenizer.decode(outputs[0])

# Read the content from the file
input_file_path = "R8_input.txt"
output_file_path = "R8.txt"

# Open the file to read
with open(input_file_path, 'r') as file:
    lines = file.readlines()

# List to hold new lines
new_lines = []

# Process each line
for line in lines:
    line = line.strip()  # Remove leading/trailing whitespace
    if line:  # Ensure the line is not empty
        # Add the original line
        new_lines.append(line)

        # Generate similar contexts
        similar_contexts = generate_similar_contexts(line)

        # Add the generated lines
        new_lines.append(similar_contexts)

# Write the new lines to the output file
with open(output_file_path, 'w') as file:
    for new_line in new_lines:
        file.write(new_line + "\n")

print(f"New data written to {output_file_path}")


### 对的

In [None]:
import torch
import gc
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig, pipeline


quantization_config = BitsAndBytesConfig(load_in_8bit=True)

# Initialize the tokenizer and model from Hugging Face
model_id = "meta-llama/Meta-Llama-3-8B"
# google/gemma-2b
# meta-llama/Meta-Llama-3-8B
# meta-llama/Llama-2-13b-chat

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
#tokenizer.add_special_tokens({'pad_token': '[PAD]'})

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    low_cpu_mem_usage=True,
    max_length=256,
    quantization_config=quantization_config,
    #attn_implementation="flash_attention_2"
)
#model.generation_config.pad_token_id = tokenizer.pad_token_id
pipeline = pipeline("text-generation",
                  model=model,
                  tokenizer=tokenizer,
                  torch_dtype=torch.float16,
                  max_new_tokens=50,
                  return_full_text=False
                                )


# Mocking the model generation for illustration purposes
def generate_similar_contexts(input):

    data = []
    # Generate output
    #outputs = model.generate(**input_ids, max_new_tokens=100, do_sample=True, min_length=512, num_beams=6, num_beam_groups=3, diversity_penalty=0.3, num_return_sequences=4, repetition_penalty=1.4)
    outputs = pipeline(
              input,
              repetition_penalty=1.8,
              #max_length=512,
              top_k=15,
              top_p=0.75,
              temperature=0.8,
              num_return_sequences=4
                       )
    # Decode and print the output
    for output in outputs:
      #print(output['generated_text'])
      data.append(output['generated_text'])
      #print("data:",data)
    return data

# Read the content from the file
input_file_path = "R8_input.txt"
output_file_path = "R8_output.txt"

# Open the file to read
with open(input_file_path, 'r') as file:
    lines = file.readlines()

# List to hold new lines
new_lines = []
cnt = 0

# Process each line
for line in lines:
    line = line.strip()

    if line:
        similar_contexts = generate_similar_contexts(line)
        cnt += 1

        #gc.collect()
        #torch.cuda.empty_cache()

        if cnt%100 == 0:
          #print(cnt)
          gc.collect()
          # Empty CUDA cache
          torch.cuda.empty_cache()
          print(cnt," of 7674")
          # L4 21min cnt=100, 39min cnt=200, 56min cnt=300
          # A100  21min cnt=100

        # Add the generated lines
        for s in similar_contexts:
          line=line + (s.replace('\n', ''))
        new_lines.append(line)
        #print("new_lines:",new_lines)


with open(output_file_path, 'w') as file:
    for new_line in new_lines:
      #print("new_line2:",new_line)
      file.write(str(new_line) + "\n")


print(f"New data written to {output_file_path}")


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


100  of 7674
200  of 7674
300  of 7674
400  of 7674
500  of 7674
600  of 7674
700  of 7674
800  of 7674
900  of 7674
1000  of 7674
1100  of 7674
1200  of 7674
1300  of 7674
1400  of 7674
1500  of 7674
1600  of 7674
1700  of 7674
1800  of 7674
1900  of 7674
2000  of 7674
2100  of 7674
2200  of 7674
2300  of 7674
2400  of 7674
2500  of 7674
2600  of 7674
2700  of 7674
2800  of 7674
2900  of 7674
3000  of 7674
3100  of 7674
3200  of 7674
3300  of 7674
3400  of 7674
3500  of 7674
3600  of 7674
3700  of 7674
3800  of 7674
3900  of 7674
4000  of 7674
4100  of 7674
4200  of 7674
4300  of 7674
4400  of 7674
4500  of 7674
4600  of 7674
4700  of 7674
4800  of 7674
4900  of 7674
5000  of 7674
5100  of 7674
5200  of 7674
5300  of 7674
5400  of 7674
5500  of 7674
5600  of 7674
5700  of 7674
5800  of 7674
5900  of 7674
6000  of 7674
6100  of 7674
6200  of 7674
6300  of 7674
6400  of 7674
6500  of 7674
6600  of 7674
6700  of 7674
6800  of 7674
6900  of 7674
7000  of 7674
7100  of 7674
7200  of 7674
7

In [None]:
import re

def clean_text(text):
    # Remove special characters, only keep letters and spaces
    cleaned_text = re.sub(r'[^a-zA-Z\s]', '', text)
    return cleaned_text

def read_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

def write_file(file_path, text):
    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(text)

def main(input_file_path, output_file_path):
    # Read the content of the input file
    text = read_file(input_file_path)

    # Clean the text
    cleaned_text = clean_text(text)

    # Write the cleaned text to the output file
    write_file(output_file_path, cleaned_text)

input_file_path = 'R8_output.txt'  # Replace with your input file path
output_file_path = 'R8.txt'  # Replace with your output file path
#main(input_file_path, output_file_path)


In [None]:
main(input_file_path, output_file_path)
print("New data written to:",output_file_path)

New data written to: R8.txt


In [None]:
!pip install pickle
!pip install networkx
!pip install scipy

In [None]:
import numpy as np
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
#from scipy.sparse.linalg.eigen.arpack import eigsh
from scipy.sparse.linalg import eigsh
import sys
import re


def loadWord2Vec(filename):
    """Read Word Vectors"""
    vocab = []
    embd = []
    word_vector_map = {}
    file = open(filename, 'r')
    for line in file.readlines():
        row = line.strip().split(' ')
        if(len(row) > 2):
            vocab.append(row[0])
            vector = row[1:]
            length = len(vector)
            for i in range(length):
                vector[i] = float(vector[i])
            embd.append(vector)
            word_vector_map[row[0]] = vector
    print('Loaded Word Vectors!')
    file.close()
    return vocab, embd, word_vector_map

def clean_str(string):
    """
    Tokenization/string cleaning for all datasets except for SST.
    Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
    """
    string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
    string = re.sub(r"\'s", " \'s", string)
    string = re.sub(r"\'ve", " \'ve", string)
    string = re.sub(r"n\'t", " n\'t", string)
    string = re.sub(r"\'re", " \'re", string)
    string = re.sub(r"\'d", " \'d", string)
    string = re.sub(r"\'ll", " \'ll", string)
    string = re.sub(r",", " , ", string)
    string = re.sub(r"!", " ! ", string)
    string = re.sub(r"\(", " \( ", string)
    string = re.sub(r"\)", " \) ", string)
    string = re.sub(r"\?", " \? ", string)
    string = re.sub(r"\s{2,}", " ", string)
    return string.strip().lower()

In [None]:
from nltk.corpus import stopwords
import nltk
from nltk.wsd import lesk
from nltk.corpus import wordnet as wn
#from utils import clean_str, loadWord2Vec
import sys

# if len(sys.argv) != 2:
# 	sys.exit("Use: python remove_words.py <dataset>")

datasets = ['20ng', 'R8', 'R52', 'ohsumed', 'mr']
dataset = 'R8'

if dataset not in datasets:
	sys.exit("wrong dataset name")

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
print(stop_words)

# Read Word Vectors
# word_vector_file = 'data/glove.6B/glove.6B.200d.txt'
# vocab, embd, word_vector_map = loadWord2Vec(word_vector_file)
# word_embeddings_dim = len(embd[0])
# dataset = '20ng'

doc_content_list = []
f = open('R8.txt', 'rb')
# f = open('data/wiki_long_abstracts_en_text.txt', 'r')
for line in f.readlines():
    doc_content_list.append(line.strip().decode('latin1'))
f.close()


word_freq = {}  # to remove rare words

for doc_content in doc_content_list:
    temp = clean_str(doc_content)
    words = temp.split()
    for word in words:
        if word in word_freq:
            word_freq[word] += 1
        else:
            word_freq[word] = 1

clean_docs = []
for doc_content in doc_content_list:
    temp = clean_str(doc_content)
    words = temp.split()
    doc_words = []
    for word in words:
        # word not in stop_words and word_freq[word] >= 5
        if dataset == 'mr':
            doc_words.append(word)
        elif word not in stop_words and word_freq[word] >= 5:
            doc_words.append(word)

    doc_str = ' '.join(doc_words).strip()
    #if doc_str == '':
        #doc_str = temp
    clean_docs.append(doc_str)

clean_corpus_str = '\n'.join(clean_docs)

f = open('R8.clean.txt', 'w')
#f = open('data/wiki_long_abstracts_en_text.clean.txt', 'w')
f.write(clean_corpus_str)
f.close()

#dataset = '20ng'
min_len = 10000
aver_len = 0
max_len = 0

f = open('R8.clean.txt', 'r')
#f = open('data/wiki_long_abstracts_en_text.txt', 'r')
lines = f.readlines()
for line in lines:
    line = line.strip()
    temp = line.split()
    aver_len = aver_len + len(temp)
    if len(temp) < min_len:
        min_len = len(temp)
    if len(temp) > max_len:
        max_len = len(temp)
f.close()
aver_len = 1.0 * aver_len / len(lines)
print('min_len : ' + str(min_len))
print('max_len : ' + str(max_len))
print('average_len : ' + str(aver_len))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


{'theirs', "doesn't", 'before', 'any', "wouldn't", 'the', "shouldn't", 'do', "that'll", 'ain', "mustn't", "it's", 'those', "aren't", "hasn't", 'don', 'up', 'now', "you've", 'such', 'had', 'our', 'own', 'all', 'being', "don't", "didn't", "shan't", 'out', 'until', 'o', 'were', 'has', "weren't", 'their', 'same', 'she', 'its', 'himself', 'them', 'again', 'should', 'mightn', 'each', 'itself', 'hers', 'then', 'hadn', 'my', 'll', 'me', 'myself', 'between', 'for', 'just', "mightn't", 'did', 'your', 'that', 'as', 'was', 'whom', 'once', 'haven', 'down', 'is', 'when', 'an', 'about', 'won', 'in', "won't", 'i', 'why', 'off', 'doesn', 're', 'here', 'hasn', 'of', 'not', 'yourself', 'than', 'over', 'these', 's', 'which', 'at', 'from', 'having', 'very', 'him', 'can', 'be', 'mustn', 'into', 'few', "couldn't", 'below', 'nor', 'we', 'and', 'his', 'if', "you're", 'are', 'by', 'didn', 'both', 'on', "should've", 'after', 'needn', 'it', 'there', "you'd", 'to', 'too', 'more', 'what', 'her', 'wasn', 'yours', 'h

## Set environments

In [None]:
from huggingface_hub import notebook_login
notebook_login()
# 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

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd /content/drive/MyDrive

/content/drive/MyDrive


In [None]:
import torch

import sys
print(sys.version)
print(sys.executable)

print(torch.__version__)
print(torch.version.cuda)

3.10.12 (main, Mar 22 2024, 16:50:05) [GCC 11.4.0]
/usr/bin/python3
2.3.0+cu121
12.1


In [None]:
!pip install transformers
!pip install nltk
!pip install pytorch-ignite

Collecting pytorch-ignite
  Downloading pytorch_ignite-0.5.0.post2-py3-none-any.whl (296 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.7/296.7 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch<3,>=1.3->pytorch-ignite)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch<3,>=1.3->pytorch-ignite)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch<3,>=1.3->pytorch-ignite)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch<3,>=1.3->pytorch-ignite)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch<3,>=1.3->pytorch-ignite)
  Using cached nvidia_cublas_cu12-12.1

In [None]:
!pip install  dgl -f https://data.dgl.ai/wheels/torch-2.3/cu121/repo.html

Looking in links: https://data.dgl.ai/wheels/torch-2.3/cu121/repo.html
Collecting dgl
  Downloading https://data.dgl.ai/wheels/torch-2.3/cu121/dgl-2.3.0%2Bcu121-cp310-cp310-manylinux1_x86_64.whl (309.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.3/309.3 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting torchdata>=0.5.0 (from dgl)
  Downloading torchdata-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m55.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torchdata, dgl
Successfully installed dgl-2.3.0+cu121 torchdata-0.7.1


In [None]:
import torch
import gc

# Collect garbage
gc.collect()

# Empty CUDA cache
torch.cuda.empty_cache()

# Print CUDA memory summary
print(torch.cuda.memory_summary(device=None, abbreviated=False))


|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |   8680 MiB |  10221 MiB |   8032 GiB |   8023 GiB |
|       from large pool |   8668 MiB |  10200 MiB |   6622 GiB |   6613 GiB |
|       from small pool |     12 MiB |    141 MiB |   1410 GiB |   1410 GiB |
|---------------------------------------------------------------------------|
| Active memory         |   8680 MiB |  10221 MiB |   8032 GiB |   8023 GiB |
|       from large pool |   8668 MiB |  10200 MiB |   6622 GiB |   6613 GiB |
|       from small pool |     12 MiB |    141 MiB |   1410 GiB |   1410 GiB |
|---------------------------------------------------------------

## With Llama data augmentation

### Test

In [None]:
%cd /content/drive/MyDrive/GAT_test

/content/drive/MyDrive/GAT_test


In [None]:
!python build_graph.py R8

Traceback (most recent call last):
  File "/content/drive/MyDrive/GAT_test/build_graph.py", line 36, in <module>
    if temp[1].find('test') != -1:
IndexError: list index out of range


In [None]:
#!python train_bert_gcn.py --dataset R8 -m 0.7
!python train_bert_gcn.py --dataset R8 -m 0.7 --nb_epochs 1

In [None]:
#!python finetune_bert.py --dataset R8 --checkpoint_dir /checkpoint/roberta-base_gcn_R8/checkpoint.pth --batch_size 64
!python finetune_bert.py --dataset R8 --checkpoint_dir /checkpoint/roberta-base_gcn_R8/checkpoint.pth --batch_size 64 --nb_epochs 1

### GCN

In [None]:
%cd /content/drive/MyDrive/GAT_copy

/content/drive/MyDrive/GAT_copy


In [None]:
!python build_graph.py R8

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,

In [None]:
!python finetune_bert.py --dataset R8 --checkpoint_dir /checkpoint/roberta-base_gcn_R8/checkpoint.pth --batch_size 64
#!python finetune_bert.py --dataset R8 --checkpoint_dir /checkpoint/roberta-base_gcn_R8/checkpoint.pth --batch_size 64 --nb_epochs 1

DGL backend not selected or invalid.  Assuming PyTorch for now.
Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)
arguments:
Namespace(max_length=128, batch_size=64, nb_epochs=60, bert_lr=0.0001, dataset='R8', bert_init='roberta-base', checkpoint_dir='/checkpoint/roberta-base_gcn_R8/checkpoint.pth')
checkpoints will be saved in /checkpoint/roberta-base_gcn_R8/checkpoint.pth
(4937, 300) (4937, 8) (2189, 300) (2189, 8) (17457, 300) (17457, 8)
19646
tokenizer_config.json: 100% 25.0/25.0 [00:00<00:00, 178kB/s]
config.json: 100% 481/481 [00:00<00:00, 3.59MB/s]
vocab.json: 100% 899k/899k [00:00<00:00, 1.04MB/s]
merges.txt: 100% 456k/456k [00:00<00:00, 1.08MB/s]
tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 6.21MB/s]
model.safetensors: 100% 499M/499M [00:01<00:00, 371MB/s]
Some weights of RobertaModel were not initialized from the model checkp

In [None]:
!python train_bert_gcn.py --dataset R8 -m 0.5
#!python train_bert_gcn.py --dataset R8 -m 0.7 --nb_epochs

arguments:
Namespace(max_length=128, batch_size=64, m=0.5, nb_epochs=50, bert_init='roberta-base', pretrained_bert_ckpt=None, dataset='R8', checkpoint_dir=None, gcn_model='gcn', gcn_layers=2, n_hidden=200, heads=8, dropout=0.5, gcn_lr=0.001, bert_lr=1e-05)
checkpoints will be saved in ./checkpoint/roberta-base_gcn_R8
(4937, 300) (4937, 8) (2189, 300) (2189, 8) (17457, 300) (17457, 8)
19646
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
graph information:
Graph(num_nodes=19646, num_edges=5646196,
      ndata_schemes={'input_ids': Scheme(shape=(128,), dtype=torch.int64), 'attention_mask': Scheme(shape=(128,), dtype=torch.int64), 'label': Scheme(shape=(), dtype=torch.int64), 'train': Scheme(shape=(), dtype=torch.float32), 'val': Scheme(shape=(), dty

### DotGatConv

In [None]:
%cd /content/drive/MyDrive/DotGatConv_copy

/content/drive/MyDrive/DotGatConv_copy


In [None]:
!python build_graph.py R8

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,

In [None]:
#!python finetune_bert.py --dataset R8 --checkpoint_dir /checkpoint/roberta-base_gcn_R8/checkpoint.pth --batch_size 64 --nb_epochs 1
!python finetune_bert.py --dataset R8 --checkpoint_dir /checkpoint/roberta-base_gcn_R8/checkpoint.pth --batch_size 64

arguments:
Namespace(max_length=128, batch_size=64, nb_epochs=60, bert_lr=0.0001, dataset='R8', bert_init='roberta-base', checkpoint_dir='/checkpoint/roberta-base_gcn_R8/checkpoint.pth')
checkpoints will be saved in /checkpoint/roberta-base_gcn_R8/checkpoint.pth
(4937, 300) (4937, 8) (2189, 300) (2189, 8) (17457, 300) (17457, 8)
19646
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch: 1  Train acc: 0.5185 loss: 1.3539  Val acc: 0.5109 loss: 1.3550  Test acc: 0.4947 loss: 1.3511
New checkpoint
Epoch: 2  Train acc: 0.5185 loss: 1.3416  Val acc: 0.5109 loss: 1.3440  Test acc: 0.4947 loss: 1.3465
Epoch: 3  Train acc: 0.5185 loss: 1.3484  Val acc: 0.5109 loss: 1.3461  Test acc: 0.4947 loss: 1.3495
Epoch: 4  Train acc: 0.5185 loss: 1.3446  Val acc: 

In [None]:
#!python train_bert_gcn.py --dataset R8 -m 0.7 --nb_epochs 1 --gcn_model gat
!python train_bert_gcn.py --dataset R8 -m 0.5 --gcn_model gat

arguments:
Namespace(max_length=128, batch_size=64, m=0.5, nb_epochs=50, bert_init='roberta-base', pretrained_bert_ckpt=None, dataset='R8', checkpoint_dir=None, gcn_model='gat', gcn_layers=2, n_hidden=200, heads=8, dropout=0.5, gcn_lr=0.001, bert_lr=1e-05)
checkpoints will be saved in ./checkpoint/roberta-base_gat_R8
(4937, 300) (4937, 8) (2189, 300) (2189, 8) (17457, 300) (17457, 8)
19646
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
graph information:
Graph(num_nodes=19646, num_edges=5646196,
      ndata_schemes={'input_ids': Scheme(shape=(128,), dtype=torch.int64), 'attention_mask': Scheme(shape=(128,), dtype=torch.int64), 'label': Scheme(shape=(), dtype=torch.int64), 'train': Scheme(shape=(), dtype=torch.float32), 'val': Scheme(shape=(), dty

### GAT

In [None]:
%cd /content/drive/MyDrive/GAT_copy

/content/drive/MyDrive/GAT_copy


In [None]:
!python build_graph.py R8

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,

In [None]:
#!python finetune_bert.py --dataset R8 --checkpoint_dir /checkpoint/roberta-base_gcn_R8/checkpoint.pth --batch_size 64 --nb_epochs 1
!python finetune_bert.py --dataset R8 --checkpoint_dir /checkpoint/roberta-base_gcn_R8/checkpoint.pth --batch_size 64

arguments:
Namespace(max_length=128, batch_size=64, nb_epochs=60, bert_lr=0.0001, dataset='R8', bert_init='roberta-base', checkpoint_dir='/checkpoint/roberta-base_gcn_R8/checkpoint.pth')
checkpoints will be saved in /checkpoint/roberta-base_gcn_R8/checkpoint.pth
(4937, 300) (4937, 8) (2189, 300) (2189, 8) (17457, 300) (17457, 8)
19646
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch: 1  Train acc: 0.5161 loss: 1.3569  Val acc: 0.5328 loss: 1.3610  Test acc: 0.4947 loss: 1.3509
New checkpoint
Epoch: 2  Train acc: 0.5161 loss: 1.3427  Val acc: 0.5328 loss: 1.3388  Test acc: 0.4947 loss: 1.3376
Epoch: 3  Train acc: 0.5161 loss: 1.3457  Val acc: 0.5328 loss: 1.3493  Test acc: 0.4947 loss: 1.3382
Epoch: 4  Train acc: 0.5161 loss: 1.3414  Val acc: 

In [None]:
#!python train_bert_gcn.py --dataset R8 -m 0.7 --nb_epochs 1 --gcn_model gat
!python train_bert_gcn.py --dataset R8 -m 0.5 --gcn_model gat

arguments:
Namespace(max_length=128, batch_size=64, m=0.5, nb_epochs=50, bert_init='roberta-base', pretrained_bert_ckpt=None, dataset='R8', checkpoint_dir=None, gcn_model='gat', gcn_layers=2, n_hidden=200, heads=8, dropout=0.5, gcn_lr=0.001, bert_lr=1e-05)
checkpoints will be saved in ./checkpoint/roberta-base_gat_R8
(4937, 300) (4937, 8) (2189, 300) (2189, 8) (17457, 300) (17457, 8)
19646
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
graph information:
Graph(num_nodes=19646, num_edges=5646196,
      ndata_schemes={'input_ids': Scheme(shape=(128,), dtype=torch.int64), 'attention_mask': Scheme(shape=(128,), dtype=torch.int64), 'label': Scheme(shape=(), dtype=torch.int64), 'train': Scheme(shape=(), dtype=torch.float32), 'val': Scheme(shape=(), dty

### GCN lr 1e-5

In [None]:
%cd /content/drive/MyDrive/GAT_test2

/content/drive/MyDrive/GAT_test2


In [None]:
!python build_graph.py R8

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,

In [None]:
#!python finetune_bert.py --dataset R8 --checkpoint_dir /checkpoint/roberta-base_gcn_R8/checkpoint.pth --batch_size 64
!python finetune_bert.py --dataset R8 --checkpoint_dir /checkpoint/roberta-base_gcn_R8/checkpoint.pth --batch_size 128 --nb_epochs 200 --bert_lr 1e-5

arguments:
Namespace(max_length=128, batch_size=128, nb_epochs=200, bert_lr=1e-05, dataset='R8', bert_init='roberta-base', checkpoint_dir='/checkpoint/roberta-base_gcn_R8/checkpoint.pth')
checkpoints will be saved in /checkpoint/roberta-base_gcn_R8/checkpoint.pth
(4937, 300) (4937, 8) (2189, 300) (2189, 8) (17457, 300) (17457, 8)
19646
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch: 1  Train acc: 0.5175 loss: 1.3467  Val acc: 0.8201 loss: 0.1163  Test acc: 0.7947 loss: 0.1477
New checkpoint
Epoch: 2  Train acc: 0.5175 loss: 1.3514  Val acc: 0.8201 loss: 0.1222  Test acc: 0.7947 loss: 0.1464
Epoch: 3  Train acc: 0.5175 loss: 1.3400  Val acc: 0.8201 loss: 0.1140  Test acc: 0.7947 loss: 0.1394
Epoch: 4  Train acc: 0.5175 loss: 1.3394  Val acc:

In [None]:
#!python train_bert_gcn.py --dataset R8 -m 0.7
#!python train_bert_gcn.py --dataset R8 -m 0.7 --nb_epochs 60 --bert_lr 1e-4 --gcn_lr 5e-3
!python train_bert_gcn.py --dataset R8 -m 0.7 --nb_epochs 60 --bert_lr 1e-5 --gcn_lr 1e-3

arguments:
Namespace(max_length=128, batch_size=64, m=0.7, nb_epochs=60, bert_init='roberta-base', pretrained_bert_ckpt=None, dataset='R8', checkpoint_dir=None, gcn_model='gcn', gcn_layers=2, n_hidden=200, heads=8, dropout=0.5, gcn_lr=0.005, bert_lr=1e-05)
checkpoints will be saved in ./checkpoint/roberta-base_gcn_R8
(4937, 300) (4937, 8) (2189, 300) (2189, 8) (17457, 300) (17457, 8)
19646
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
graph information:
Graph(num_nodes=19646, num_edges=5646196,
      ndata_schemes={'input_ids': Scheme(shape=(128,), dtype=torch.int64), 'attention_mask': Scheme(shape=(128,), dtype=torch.int64), 'label': Scheme(shape=(), dtype=torch.int64), 'train': Scheme(shape=(), dtype=torch.float32), 'val': Scheme(shape=(), dty

### GCN M = 0.5

In [None]:
%cd /content/drive/MyDrive/GAT_copy

In [None]:
!python build_graph.py R8

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,

In [None]:
#!python finetune_bert.py --dataset R8 --checkpoint_dir /checkpoint/roberta-base_gcn_R8/checkpoint.pth --batch_size 64
!python finetune_bert.py --dataset R8 --checkpoint_dir /checkpoint/roberta-base_gcn_R8/checkpoint.pth --batch_size 64

arguments:
Namespace(max_length=128, batch_size=64, nb_epochs=60, bert_lr=0.0001, dataset='R8', bert_init='roberta-base', checkpoint_dir='/checkpoint/roberta-base_gcn_R8/checkpoint.pth')
checkpoints will be saved in /checkpoint/roberta-base_gcn_R8/checkpoint.pth
(4937, 300) (4937, 8) (2189, 300) (2189, 8) (17457, 300) (17457, 8)
19646
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch: 1  Train acc: 0.5173 loss: 1.3485  Val acc: 0.5219 loss: 1.3210  Test acc: 0.4947 loss: 1.3530
New checkpoint
Epoch: 2  Train acc: 0.5173 loss: 1.3422  Val acc: 0.5219 loss: 1.3174  Test acc: 0.4947 loss: 1.3344
Epoch: 3  Train acc: 0.5173 loss: 1.3417  Val acc: 0.5219 loss: 1.3171  Test acc: 0.4947 loss: 1.3364
Epoch: 4  Train acc: 0.5173 loss: 1.3491  Val acc: 

In [None]:
#!python train_bert_gcn.py --dataset R8 -m 0.7
!python train_bert_gcn.py --dataset R8 -m 0.5

arguments:
Namespace(max_length=128, batch_size=64, m=0.5, nb_epochs=50, bert_init='roberta-base', pretrained_bert_ckpt=None, dataset='R8', checkpoint_dir=None, gcn_model='gcn', gcn_layers=2, n_hidden=200, heads=8, dropout=0.5, gcn_lr=0.001, bert_lr=1e-05)
checkpoints will be saved in ./checkpoint/roberta-base_gcn_R8
(4937, 300) (4937, 8) (2189, 300) (2189, 8) (17457, 300) (17457, 8)
19646
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
graph information:
Graph(num_nodes=19646, num_edges=5646196,
      ndata_schemes={'input_ids': Scheme(shape=(128,), dtype=torch.int64), 'attention_mask': Scheme(shape=(128,), dtype=torch.int64), 'label': Scheme(shape=(), dtype=torch.int64), 'train': Scheme(shape=(), dtype=torch.float32), 'val': Scheme(shape=(), dty

### GCN more epoch

In [None]:
%cd /content/drive/MyDrive/GAT_copy

/content/drive/MyDrive/GAT_copy


In [None]:
!python build_graph.py R8

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,

In [None]:
!python finetune_bert.py --dataset R8 --checkpoint_dir /checkpoint/roberta-base_gcn_R8/checkpoint.pth --batch_size 64
#!python finetune_bert.py --dataset R8 --checkpoint_dir /checkpoint/roberta-base_gcn_R8/checkpoint.pth --batch_size 64 --nb_epochs 1

DGL backend not selected or invalid.  Assuming PyTorch for now.
Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)
arguments:
Namespace(max_length=128, batch_size=64, nb_epochs=60, bert_lr=0.0001, dataset='R8', bert_init='roberta-base', checkpoint_dir='/checkpoint/roberta-base_gcn_R8/checkpoint.pth')
checkpoints will be saved in /checkpoint/roberta-base_gcn_R8/checkpoint.pth
(4937, 300) (4937, 8) (2189, 300) (2189, 8) (17457, 300) (17457, 8)
19646
tokenizer_config.json: 100% 25.0/25.0 [00:00<00:00, 168kB/s]
config.json: 100% 481/481 [00:00<00:00, 4.50MB/s]
vocab.json: 100% 899k/899k [00:00<00:00, 4.18MB/s]
merges.txt: 100% 456k/456k [00:00<00:00, 1.06MB/s]
tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 6.05MB/s]
model.safetensors: 100% 499M/499M [00:01<00:00, 481MB/s]
Some weights of RobertaModel were not initialized from the model checkp

In [None]:
#!python train_bert_gcn.py --dataset R8 -m 0.5
!python train_bert_gcn.py --dataset R8 -m 0.5 --nb_epochs 150

arguments:
Namespace(max_length=128, batch_size=64, m=0.5, nb_epochs=150, bert_init='roberta-base', pretrained_bert_ckpt=None, dataset='R8', checkpoint_dir=None, gcn_model='gcn', gcn_layers=2, n_hidden=200, heads=8, dropout=0.5, gcn_lr=0.001, bert_lr=1e-05)
checkpoints will be saved in ./checkpoint/roberta-base_gcn_R8
(4937, 300) (4937, 8) (2189, 300) (2189, 8) (17457, 300) (17457, 8)
19646
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
graph information:
Graph(num_nodes=19646, num_edges=5646196,
      ndata_schemes={'input_ids': Scheme(shape=(128,), dtype=torch.int64), 'attention_mask': Scheme(shape=(128,), dtype=torch.int64), 'label': Scheme(shape=(), dtype=torch.int64), 'train': Scheme(shape=(), dtype=torch.float32), 'val': Scheme(shape=(), dt