In [3]:


import torch  
from datasets import load_dataset, Dataset 
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, get_scheduler, RobertaForSequenceClassification
from peft import LoraConfig, get_peft_model, AutoPeftModelForSequenceClassification  
from torch.utils.data import DataLoader, Dataset
import numpy as np  
import pandas as pd
import evaluate




In [2]:
datasets = load_dataset("liar",trust_remote_code=True)

print(datasets)

DatasetDict({
    train: Dataset({
        features: ['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'],
        num_rows: 10269
    })
    test: Dataset({
        features: ['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'],
        num_rows: 1283
    })
    validation: Dataset({
        features: ['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'],
        num_rows: 1284
    })
})


In [3]:
dff=pd.DataFrame(datasets["train"].shuffle(seed=42).select(range(5)))
dff


Unnamed: 0,id,label,statement,subject,speaker,job_title,state_info,party_affiliation,barely_true_counts,false_counts,half_true_counts,mostly_true_counts,pants_on_fire_counts,context
0,3358.json,1,Thirty-five states have accepted high-speed in...,"stimulus,transportation",raymond-lahood,"Secretary, U.S. Department of Transportation",Illinois,republican,0.0,1.0,1.0,1.0,0.0,a speech to the American Association of State ...
1,8048.json,2,"Since I was elected, crime rates have been at ...",crime,bill-foster,"Mayor, St. Petersburg",Florida,republican,1.0,0.0,2.0,2.0,0.0,a campaign brochure
2,8280.json,5,Warren Buffett recently said Scrap Obamacare a...,"health-care,pundits",blog-posting,,,none,7.0,19.0,3.0,5.0,44.0,posts on the Internet
3,481.json,2,"Eliminating earmarks ""would make barely a drop...",federal-budget,bob-barr,"Runs a consulting firm, Liberty Strategies LLC",Georgia,libertarian,0.0,0.0,0.0,1.0,0.0,a news conference announcing his Libertarian c...
4,12651.json,4,"Rather than work to secure the border, (Marco ...",immigration,carlos-beruff,Developer,Florida,republican,3.0,1.0,0.0,0.0,0.0,a statement to press


In [4]:
columns_to_remove = ['id', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 
                     'barely_true_counts', 'false_counts', 'half_true_counts', 
                     'mostly_true_counts', 'pants_on_fire_counts', 'context']

dataset = datasets.remove_columns(columns_to_remove)

In [5]:
model_ck="roberta-base"

tokenizer = AutoTokenizer.from_pretrained(model_ck)


class CustomRobertaModel(RobertaForSequenceClassification):
    def __init__(self, config):
        super().__init__(config)
        self.dropout = torch.nn.Dropout(0.3)  # Increased dropout rate

model = CustomRobertaModel.from_pretrained(model_ck, num_labels=6)



Some weights of CustomRobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
def tokenize_fun(example):
    return tokenizer(example["statement"],padding="max_length",truncation=True)

tokenized_dataset = dataset.map(tokenize_fun,batched=True)



In [7]:
class Liardataset(Dataset):
    def __init__(self,tokenized_data):
        self.data = tokenized_data

    def __len__(self):
        return len(self.data["input_ids"])
    
    def __getitem__(self,idx):
        return {
            "input_ids" : torch.tensor(self.data["input_ids"][idx],dtype = torch.long),
            "attention_mask": torch.tensor(self.data["attention_mask"][idx], dtype=torch.long),
            "labels": torch.tensor(self.data["label"][idx], dtype=torch.long),
        }


In [8]:
batch_size = 16

train_dataset = Liardataset(tokenized_dataset["train"])
val_dataset = Liardataset(tokenized_dataset["validation"])
test_dataset = Liardataset(tokenized_dataset["test"])

train_dataloader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
val_dataloader = DataLoader(val_dataset,batch_size=batch_size,shuffle=False)
test_dataloader = DataLoader(test_dataset,batch_size=batch_size,shuffle=False)

batch = next(iter(train_dataloader))
print(batch)


{'input_ids': tensor([[    0,  1620,  3428,  ...,     1,     1,     1],
        [    0,  4030,    90,  ...,     1,     1,     1],
        [    0, 11321,  1699,  ...,     1,     1,     1],
        ...,
        [    0,  4148,    36,  ...,     1,     1,     1],
        [    0,   104,  4113,  ...,     1,     1,     1],
        [    0,   133,  6866,  ...,     1,     1,     1]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]]), 'labels': tensor([5, 3, 4, 3, 1, 0, 5, 4, 1, 0, 3, 0, 3, 0, 5, 0])}


In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
metric = evaluate.load("accuracy")
model.eval()
with torch.no_grad():
    for batch in val_dataloader:
        batch = {key:vals.to(device) for key,vals in batch.items()}
        outputs = model(input_ids = batch["input_ids"], attention_mask = batch["attention_mask"])
        logits = outputs.logits
        predictions = torch.argmax(logits,dim=-1)

        metric.add_batch(predictions=predictions, references=batch["labels"])
accuracy = metric.compute()
print(f"Pretrained Model Accuracy: {accuracy['accuracy']:.4f}")

Pretrained Model Accuracy: 0.1316


In [10]:
lora_config = LoraConfig(
    r=32,
    lora_alpha=64,
    lora_dropout=0.1,
    target_modules = ["query", "key", "value"],
    bias="none",
    task_type="SEQ_CLS"
)
if hasattr(model,"lora_config"):
    model=model.unload()
if hasattr(model,"lora_config"):
    model=model.unload()
peft_model = get_peft_model(model,lora_config)

peft_model.to(device)

peft_model.print_trainable_parameters()

trainable params: 2,364,678 || all params: 127,014,924 || trainable%: 1.8617


In [11]:
for name, param in model.named_parameters():
    print(name)
 

roberta.embeddings.word_embeddings.weight
roberta.embeddings.position_embeddings.weight
roberta.embeddings.token_type_embeddings.weight
roberta.embeddings.LayerNorm.weight
roberta.embeddings.LayerNorm.bias
roberta.encoder.layer.0.attention.self.query.base_layer.weight
roberta.encoder.layer.0.attention.self.query.base_layer.bias
roberta.encoder.layer.0.attention.self.query.lora_A.default.weight
roberta.encoder.layer.0.attention.self.query.lora_B.default.weight
roberta.encoder.layer.0.attention.self.key.base_layer.weight
roberta.encoder.layer.0.attention.self.key.base_layer.bias
roberta.encoder.layer.0.attention.self.key.lora_A.default.weight
roberta.encoder.layer.0.attention.self.key.lora_B.default.weight
roberta.encoder.layer.0.attention.self.value.base_layer.weight
roberta.encoder.layer.0.attention.self.value.base_layer.bias
roberta.encoder.layer.0.attention.self.value.lora_A.default.weight
roberta.encoder.layer.0.attention.self.value.lora_B.default.weight
roberta.encoder.layer.0.atte

In [12]:
from torch.nn import functional as F
from transformers import Trainer

class FocalLoss(torch.nn.Module):
    def __init__(self, gamma=2.0):
        super(FocalLoss, self).__init__()
        self.gamma = gamma

    def forward(self, logits, targets):
        probs = F.softmax(logits, dim=-1)
        probs = probs.gather(dim=-1, index=targets.unsqueeze(-1))
        probs = probs.squeeze(-1)
        loss = -((1 - probs) ** self.gamma) * torch.log(probs)
        return loss.mean()

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fn = FocalLoss(gamma=2.0)
        loss = loss_fn(logits, labels)
        return (loss, outputs) if return_outputs else loss


In [13]:
from torch.optim import AdamW
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=1e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=8,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    push_to_hub=False,
    label_names=["labels"],
    optim="adamw_torch",  
    lr_scheduler_type="cosine"  
)

num_training_steps = len(train_dataloader) * training_args.num_train_epochs

lr_scheduler = get_scheduler(
    name="cosine",
    optimizer=AdamW(peft_model.parameters(), lr=training_args.learning_rate),
    num_warmup_steps=int(0.1 * num_training_steps),
    num_training_steps=num_training_steps,
)

trainer = CustomTrainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    optimizers=(AdamW(peft_model.parameters(), lr=training_args.learning_rate), lr_scheduler),
)



trainer.train()

Epoch,Training Loss,Validation Loss
1,1.1936,1.209208
2,1.1904,1.200664
3,1.1846,1.169435
4,1.1018,1.158867
5,1.1463,1.135755
6,1.1004,1.125684
7,1.1085,1.12172
8,1.1981,1.133045


TrainOutput(global_step=5136, training_loss=1.1567948608197898, metrics={'train_runtime': 3413.988, 'train_samples_per_second': 24.063, 'train_steps_per_second': 1.504, 'total_flos': 2.2212651732074496e+16, 'train_loss': 1.1567948608197898, 'epoch': 8.0})

In [14]:

accuracy_metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy_metric.compute(predictions=predictions, references=labels)


trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    compute_metrics=compute_metrics  
)


eval_results = trainer.evaluate()
accuracy = eval_results["eval_accuracy"]
print(f"Trained Model Accuracy: {accuracy:.4f}")


Trained Model Accuracy: 0.2671


In [9]:
from peft import PeftModel

peft_model.save_pretrained("peft_model")


NameError: name 'peft_model' is not defined

In [4]:
from peft import PeftModel
base_model = AutoModelForSequenceClassification.from_pretrained("roberta-base",num_labels=6)
lora_model = PeftModel.from_pretrained(base_model, "peft_model")
tokenizer = AutoTokenizer.from_pretrained("roberta-base")


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
from transformers import pipeline  
id2label = {
    0: "True",
    1: "Mostly-True",
    2: "Half-True",
    3: "Barely-True",
    4: "False",
    5: "Pants-on-Fire"
}

classifier = pipeline("text-classification", model=lora_model, tokenizer=tokenizer)  

text = "I will become the new AIML Lead"
result = classifier(text)[0]  
predicted_label = id2label[int(result['label'].split('_')[-1])]  
confidence = result['score']

print(f"Predicted Label: {predicted_label} (Confidence: {confidence:.2f})")

Device set to use cuda:0
The model 'PeftModelForSequenceClassification' is not supported for text-classification. Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BioGptForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'LlamaForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DiffLlamaForSequenceClassification', 'DistilBertForSequenceClassification', 'ElectraForSequenceClassification', 'ErnieForSequenceClassification', 'ErnieMForSequenceClassification', 'EsmForSequenceClassification', 'FalconForSequenceClassification', 'FlaubertForSequenceClassification', 'FNetForSequenceClassification', 'Fun

Predicted Label: True (Confidence: 0.25)


In [6]:
from transformers import pipeline  
id2label = {
    0: "True",
    1: "Mostly-True",
    2: "Half-True",
    3: "Barely-True",
    4: "False",
    5: "Pants-on-Fire"
}

classifier = pipeline("text-classification", model=lora_model, tokenizer=tokenizer)  

text = "I will not become the new AIML Lead"
result = classifier(text)[0]  
predicted_label = id2label[int(result['label'].split('_')[-1])]  
confidence = result['score']

print(f"Predicted Label: {predicted_label} (Confidence: {confidence:.2f})")

Device set to use cuda:0
The model 'PeftModelForSequenceClassification' is not supported for text-classification. Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BioGptForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'LlamaForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DiffLlamaForSequenceClassification', 'DistilBertForSequenceClassification', 'ElectraForSequenceClassification', 'ErnieForSequenceClassification', 'ErnieMForSequenceClassification', 'EsmForSequenceClassification', 'FalconForSequenceClassification', 'FlaubertForSequenceClassification', 'FNetForSequenceClassification', 'Fun

Predicted Label: True (Confidence: 0.28)
