In [1]:
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"



In [2]:
import numpy as np
import torch

from transformers import AutoTokenizer, AutoModelForQuestionAnswering, TrainingArguments, Trainer
from torch.utils.data import Dataset
import logging

from datasets import load_dataset

raw_datasets  = load_dataset("glue", 'mrpc')

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from transformers import AutoTokenizer, AutoModelForMaskedLM, AutoConfig
# from roberta import RobertaForSequenceClassification
# from modeling import CLMSequenceClassification


#model_name = "openai-community/gpt2-medium"
model_name = "HuggingFaceTB/SmolLM2-360M"
#config.num_labels=2
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

import torch
import torch.nn as nn
from transformers import AutoModelForSequenceClassification
from transformers.activations import ACT2FN
import random



model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2).to('cuda')
model.config.pad_token_id = tokenizer.eos_token_id
import RoCoFT

RoCoFT.PEFT(model, method='row', rank=3) 

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at HuggingFaceTB/SmolLM2-360M and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
from transformers import AutoTokenizer, DataCollatorWithPadding


tokenizer.padding_side = 'left'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
# col_to_delete = ['idx']
col_to_delete = ['sentence1','sentence2']

def preprocessing_function(examples):
    prompts = [
        f"Sentence 1: {s1} Sentence 2: {s2} "
        f"Do these two sentences mean the same thing? Answer:"
        for s1, s2 in zip(examples['sentence1'], examples['sentence2'])
    ]
    return tokenizer(prompts, truncation=True, max_length=512)

tokenized_datasets = raw_datasets.map(preprocessing_function, batched=True, remove_columns=col_to_delete)
# llama_tokenized_datasets = llama_tokenized_datasets.rename_column("target", "label")
tokenized_datasets.set_format("torch")

# Data collator for padding a batch of examples to the maximum length seen in the batch
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Map: 100%|██████████| 3668/3668 [00:00<00:00, 14097.71 examples/s]
Map: 100%|██████████| 408/408 [00:00<00:00, 14794.72 examples/s]
Map: 100%|██████████| 1725/1725 [00:00<00:00, 16225.31 examples/s]


In [6]:
tokenizer.decode(19842)

' Answer'

In [7]:
tokenizer.decode(tokenized_datasets['validation']['input_ids'][10])

'Sentence 1: The delegates said raising and distributing funds has been complicated by the U.S. crackdown on jihadi charitable foundations , bank accounts of terror-related organizations and money transfers . Sentence 2: Bin Laden ’ s men pointed out that raising and distributing funds has been complicated by the U.S. crackdown on jihadi charitable foundations , bank accounts of terror-related organizations and money transfers . Do these two sentences mean the same thing? Answer:'

In [8]:
import evaluate
import numpy as np
from sklearn import metrics
import torch
import numpy as np

def compute_metrics(eval_pred):


    logits, labels = eval_pred # eval_pred is the tuple of predictions and labels returned by the model
    predictions = np.argmax(logits, axis=-1)
    
    precision = metrics.precision_score(labels, predictions, average="macro")
    recall = metrics.recall_score(labels, predictions, average="macro")
    f1 = metrics.f1_score(labels, predictions, average="macro")
    accuracy = metrics.accuracy_score(labels, predictions)
    
    return {"precision": precision, "recall": recall, "f1-score": f1, 'accuracy': accuracy}

In [9]:
from transformers import TrainingArguments, Trainer

import time
from transformers import Trainer, TrainingArguments
training_args = TrainingArguments(
    
    output_dir='dir',
    learning_rate=5e-4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps= 4,
    num_train_epochs=10,
    eval_strategy="steps",
    save_strategy="steps",
    save_total_limit=2,
    save_steps=1000000000,
    weight_decay=0.1,
    warmup_ratio=0.1,
    logging_steps=100,
    max_grad_norm = 1,
    load_best_model_at_end=True,
    lr_scheduler_type="cosine",  # You can choose from 'linear', 'cosine', 'cosine_with_restarts', 'polynomial', etc.
    warmup_steps=100,
    label_smoothing_factor=0.1,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],

    data_collator=data_collator,
    compute_metrics=compute_metrics
)

In [10]:
trainer.train()

Step,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
100,0.8048,0.626935,0.844444,0.511628,0.430622,0.691176
200,0.6032,0.535032,0.773691,0.643077,0.653482,0.759804
300,0.5382,0.485846,0.793262,0.72564,0.74424,0.801471
400,0.4898,0.469419,0.781866,0.724139,0.740988,0.796569
500,0.4607,0.470279,0.792279,0.800408,0.796021,0.821078
600,0.3785,0.472966,0.793638,0.777778,0.784681,0.818627
700,0.3747,0.469531,0.832128,0.807619,0.817894,0.848039
800,0.3019,0.655421,0.833013,0.716429,0.739924,0.808824
900,0.3164,0.570778,0.817512,0.75844,0.777232,0.823529
1000,0.2726,0.5674,0.800651,0.807869,0.804018,0.828431


TrainOutput(global_step=2290, training_loss=0.318262771539813, metrics={'train_runtime': 1697.1853, 'train_samples_per_second': 21.612, 'train_steps_per_second': 1.349, 'total_flos': 16990198986240.0, 'train_loss': 0.318262771539813, 'epoch': 9.959651035986914})