In [33]:
from datasets import load_dataset
from torch.utils.data import DataLoader
from datasets import Dataset , load_dataset, DatasetDict
import pandas as pd

def decision2label(decision):
    if  "grant" in decision:
        return 1
    elif "deny" in decision:
        return 0
    else:
        print(f"error occured with decision: {decision} ",)
        exit("Invalid decision")



import evaluate


def test_metrics(model, dataloader):
    acc = evaluate.load("accuracy")
    preci = evaluate.load("precision")
    recall = evaluate.load("recall")

    model.eval()
    for batch in dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)

        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)
        acc.add_batch(predictions=predictions, references=batch["labels"])
        preci.add_batch(predictions=predictions, references=batch["labels"])
        recall.add_batch(predictions=predictions, references=batch["labels"])

    return {'accuracy': acc.compute()['accuracy'],
            'precision': preci.compute()['precision'], 
            'recall': recall.compute()['recall']}

TESTSET = "../dataset/testset.csv"

testset = pd.read_csv(TESTSET, index_col=0)

testset['labels'] = testset['completion'].apply(decision2label)

train = testset.loc[testset['data_type'] == 'train']
test = testset.loc[testset['data_type'] == 'test']

support_train = train.loc[train['brief_type'] == "support"]
support_test = test.loc[test['brief_type'] == "support"]

oppo_train = train.loc[train['brief_type'] == "opposition"]
oppo_test = test.loc[test['brief_type'] == "opposition"]


from transformers import AutoTokenizer





# can change the argument

dataset_train = Dataset.from_pandas(support_train, preserve_index=False)
dataset_test = Dataset.from_pandas(support_test, preserve_index=False)

dataset = DatasetDict()


dataset['train'] = dataset_train
dataset['test'] = dataset_test



tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize_function(briefs):
    return tokenizer(briefs["prompt"], padding="max_length", truncation=True)


tokenized_datasets = dataset.map(tokenize_function, batched=True)

# small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(200))
# small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(200))

tokenized_datasets = tokenized_datasets.remove_columns(["completion","prompt","brief_type","data_type", "file_path", "file_name"])
tokenized_datasets.set_format("torch")


train_dataloader = DataLoader(tokenized_datasets["train"], shuffle=True, batch_size=16, )
eval_dataloader = DataLoader(tokenized_datasets["test"], batch_size=16, ) 


print(dataset)
testset





from transformers import AutoModelForSequenceClassification






from transformers import TrainingArguments

training_args = TrainingArguments(output_dir="test_trainer")



import numpy as np
import evaluate

metric = evaluate.load("accuracy")


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)



from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

from torch.optim import AdamW

lr = 1e-5 # 5e-5
optimizer = AdamW(model.parameters(), lr=lr)



from transformers import get_scheduler

num_epochs = 30
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
)


import torch
import wandb

device = torch.device("cuda:1") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

wandb.init(
    # set the wandb project where this run will be logged
    project="LLM_TOTURIAL",  
    name= f"support-bert-base-uncased",#f"Opposition-mistral-7B-v0.1-1-Tokensize:{max_input_size}",
    # track hyperparameters and run metadata
    config={
    "optimizer": "AdamW",
    "lr": lr,

    "dataset": "single-supports",
    "epochs": num_epochs,
    }
)



Map:   0%|          | 0/321 [00:00<?, ? examples/s]

Map:   0%|          | 0/309 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['prompt', 'completion', 'brief_type', 'data_type', 'file_path', 'file_name', 'labels'],
        num_rows: 321
    })
    test: Dataset({
        features: ['prompt', 'completion', 'brief_type', 'data_type', 'file_path', 'file_name', 'labels'],
        num_rows: 309
    })
})


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




VBox(children=(Label(value='0.005 MB of 0.005 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113192790394856, max=1.0…

In [34]:
from tqdm.auto import tqdm

progress_bar = tqdm(range(num_training_steps))

model.train()
for epoch in range(num_epochs):
    avg_loss = 0
    acc = evaluate.load("accuracy")
    for batch in train_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)

        logits = outputs.logits
        #print(logits)

        
        loss = outputs.loss
        loss.backward()


        avg_loss += loss.item()
        
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1)
        
        predictions = torch.argmax(logits, dim=-1)
        acc.add_batch(predictions=predictions, references=batch["labels"])
    
    print(f"Average loss: {avg_loss / len(train_dataloader)}")

    accuracy_per_epoch = acc.compute()
    
    metrics = test_metrics(model, eval_dataloader)


    wandb.log({"loss_per_epoch": avg_loss , 
               "accuracy_per_epoch": accuracy_per_epoch,
               "test_accuracy" :metrics["accuracy"],
                "test_recall": metrics["recall"],
                "test_precision": metrics["precision"],
               })

wandb.finish()

  0%|          | 0/630 [00:00<?, ?it/s]

Average loss: 0.7015666337240309
Average loss: 0.6854392403648013
Average loss: 0.6884384864852542
Average loss: 0.6615286327543712
Average loss: 0.620851335071382
Average loss: 0.5442800507659004
Average loss: 0.43261669434252237
Average loss: 0.29032649667490096
Average loss: 0.18471101174751917
Average loss: 0.1161613031512215
Average loss: 0.08038923268516858
Average loss: 0.060831251953329356
Average loss: 0.048320677486203965
Average loss: 0.04068700383816447
Average loss: 0.03554647486834299
Average loss: 0.031703087456879164
Average loss: 0.028618769099315006
Average loss: 0.026416997247863384
Average loss: 0.02460611132638795
Average loss: 0.023164382780946437
Average loss: 0.0219123920957957
Average loss: 0.020978232712617943
Average loss: 0.02011626089612643
Average loss: 0.01913219965284779
Average loss: 0.018650019142244543
Average loss: 0.01689320880298813
Average loss: 0.014122304107461656
Average loss: 0.013629738064039321
Average loss: 0.013479770937313637
Average loss



VBox(children=(Label(value='0.005 MB of 0.005 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
loss_per_epoch,████▇▆▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▂▁▆▆▇▇▆▇▇▇▇▇▇▇▇▇▇▇██████▇████
test_precision,▁▁▁▄▄▆▅▅▆▆▇▆▆▆▆▆▇▇▆█▇██▇█▇████
test_recall,█▆█▄▅▁▂▄▁▃▂▂▂▂▁▁▂▁▂▂▂▂▁▂▁▁▂▂▂▂

0,1
loss_per_epoch,0.28565
test_accuracy,0.57929
test_precision,0.55752
test_recall,0.44056


In [30]:
print(test_metrics(model, train_dataloader))
print(test_metrics(model, eval_dataloader))


{'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0}
{'accuracy': 0.5145631067961165, 'precision': 0.47904191616766467, 'recall': 0.5594405594405595}


In [9]:
train.loc[train['labels'] == 0].shape[0]

290

In [26]:
model.eval()
for batch in train_dataloader:
    batch = {k: v.to(device) for k, v in batch.items()}
    outputs = model(**batch)
    # print("input")
    # print(batch)

    logits = outputs.logits
    print(logits)
    print((logits > 0.5) , batch["labels"])
    

tensor([[ 1.8990, -2.2946],
        [-2.4458,  2.5934],
        [-2.4032,  2.5748],
        [-2.2463,  2.3520],
        [-2.4631,  2.5935],
        [ 2.3928, -2.6554],
        [-2.3922,  2.5310],
        [ 2.1212, -2.4095],
        [ 2.4318, -2.7314],
        [-2.4586,  2.5845],
        [-2.4614,  2.4970],
        [ 2.3138, -2.4795],
        [ 2.1510, -2.3522],
        [ 2.3866, -2.5772],
        [-2.4397,  2.5121],
        [ 2.3665, -2.7674]], device='cuda:1', grad_fn=<AddmmBackward0>)
tensor([[ True, False],
        [False,  True],
        [False,  True],
        [False,  True],
        [False,  True],
        [ True, False],
        [False,  True],
        [ True, False],
        [ True, False],
        [False,  True],
        [False,  True],
        [ True, False],
        [ True, False],
        [ True, False],
        [False,  True],
        [ True, False]], device='cuda:1') tensor([0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0], device='cuda:1')
tensor([[-2.3037,  2.4711],
    

In [10]:
len(train_dataloader)

289