In [None]:
!pip install torch bitsandbytes datasets==2.13.1 scipy
!pip install accelerate
!pip install transformers
!pip install peft
!pip install trl

In [None]:
import argparse
import bitsandbytes as bnb
from datasets import load_dataset
from functools import partial
import os
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, AutoPeftModelForCausalLM
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed, Trainer, TrainingArguments, BitsAndBytesConfig, \
    DataCollatorForLanguageModeling, Trainer, TrainingArguments
from datasets import load_dataset

# Important Functions

In [None]:
def train(model, tokenizer, dataset, output_dir):
    # Apply preprocessing to the model to prepare it by
    # 1 - Enabling gradient checkpointing to reduce memory usage during fine-tuning
    model.gradient_checkpointing_enable()

    # 2 - Using the prepare_model_for_kbit_training method from PEFT
    model = prepare_model_for_kbit_training(model)

    # Get lora module names
    modules = find_all_linear_names(model)
    print("bunlar "+  str(modules) )

    # Create PEFT config for these modules and wrap the model to PEFT
    peft_config = create_peft_config(modules)
    model = get_peft_model(model, peft_config)

    # Print information about the percentage of trainable parameters
    print_trainable_parameters(model)

    # Training parameters
    trainer = Trainer(
        model=model,
        train_dataset=dataset,
        args=TrainingArguments(
            per_device_train_batch_size=1,
            gradient_accumulation_steps=4,
            warmup_steps=2,
            max_steps=20,
            learning_rate=2e-4,
            fp16=True,
            logging_steps=1,
            output_dir="outputs",
            optim="paged_adamw_8bit",
        ),
        data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
    )

    model.config.use_cache = False  # re-enable for inference to speed up predictions for similar inputs

    ### SOURCE https://github.com/artidoro/qlora/blob/main/qlora.py
    # Verifying the datatypes before training

    dtypes = {}
    for _, p in model.named_parameters():
        dtype = p.dtype
        if dtype not in dtypes: dtypes[dtype] = 0
        dtypes[dtype] += p.numel()
    total = 0
    for k, v in dtypes.items(): total+= v
    for k, v in dtypes.items():
        print(k, v, v/total)

    do_train = True

    # Launch training
    print("Training...")

    if do_train:
        train_result = trainer.train()
        metrics = train_result.metrics
        trainer.log_metrics("train", metrics)
        trainer.save_metrics("train", metrics)
        trainer.save_state()
        print(metrics)

    ###

    # Saving model
    print("Saving last checkpoint of the model...")
    os.makedirs(output_dir, exist_ok=True)
    trainer.model.save_pretrained(output_dir)

    # Free memory for merging weights
    del model
    del trainer
    torch.cuda.empty_cache()



def create_prompt_formats_SNLI(sample):
    """
    Format various fields of the sample ('premise',	'hypothesis',	label')
    Then concatenate them using two newline characters
    :param sample: Sample dictionnary
    """
    INTRO_BLURB = "Below is an NLI instruction that describes a NLI task. ."
    INSTRUCTION_KEY = "### Instruction:"
    INPUT_KEY = "### Input:"
    RESPONSE_KEY = "### Response:"
    END_KEY = "### End"

    blurb = f"{INTRO_BLURB}"
    instruction = f"{INSTRUCTION_KEY}\n Detect whether the given “hypothesis” logically follows from the 'premise'"
    input_context = f"{INPUT_KEY}\n Hypothesis: {sample['hypothesis']}, Premise:{sample['premise']}  "
    response = f"{RESPONSE_KEY}\n{mapp[sample['label']]}"
    end = f"{END_KEY}"

    parts = [part for part in [blurb, instruction, input_context, response, end] if part]

    formatted_prompt = "\n\n".join(parts)

    sample["text"] = formatted_prompt
    return sample

# SOURCE https://github.com/databrickslabs/dolly/blob/master/training/trainer.py
def get_max_length(model):
    conf = model.config
    max_length = None
    for length_setting in ["n_positions", "max_position_embeddings", "seq_length"]:
        max_length = getattr(model.config, length_setting, None)
        if max_length:
            print(f"Found max lenth: {max_length}")
            break
    if not max_length:
        max_length = 1024
        print(f"Using default max length: {max_length}")
    return max_length


def preprocess_batch(batch, tokenizer, max_length):
    """
    Tokenizing a batch
    """
    return tokenizer(
        batch["text"],
        max_length=max_length,
        truncation=True,
    )


# SOURCE https://github.com/databrickslabs/dolly/blob/master/training/trainer.py
def preprocess_dataset(tokenizer: AutoTokenizer, max_length: int, seed, dataset):
    """Format & tokenize it so it is ready for training
    :param tokenizer (AutoTokenizer): Model Tokenizer
    :param max_length (int): Maximum number of tokens to emit from tokenizer
    """

    # Add prompt to each sample
    print("Preprocessing dataset...")
    dataset = dataset.map(create_prompt_formats_SNLI)#, batched=True)

    # Apply preprocessing to each batch of the dataset & and remove 'instruction', 'context', 'response', 'category' fields
    _preprocessing_function = partial(preprocess_batch, max_length=max_length, tokenizer=tokenizer)
    dataset = dataset.map(
        _preprocessing_function,
        batched=True,
        remove_columns=['premise', 'hypothesis', 'label', '__index_level_0__','text'],
    )

    # Filter out samples that have input_ids exceeding max_length
    dataset = dataset.filter(lambda sample: len(sample["input_ids"]) < max_length)

    # Shuffle dataset
    dataset = dataset.shuffle(seed=seed)

    return dataset

def create_bnb_config():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

    return bnb_config

def create_peft_config(modules):
    """
    Create Parameter-Efficient Fine-Tuning config for your model
    :param modules: Names of the modules to apply Lora to
    """
    config = LoraConfig(
        r=16,  # dimension of the updated matrices
        lora_alpha=64,  # parameter for scaling
        target_modules=modules,
        lora_dropout=0.1,  # dropout probability for layers
        bias="none",
        task_type="CAUSAL_LM",
    )
    return config

def find_all_linear_names(model):
    # the return of this function will be passed to LoraConfig(target_modules=...)
    cls = bnb.nn.Linear4bit #if args.bits == 4 else (bnb.nn.Linear8bitLt if args.bits == 8 else torch.nn.Linear)
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    print("neymis bu lora names..."+str(lora_module_names))
    if 'lm_head' in lora_module_names:  # needed for 16-bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

def print_trainable_parameters(model, use_4bit=False):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        num_params = param.numel()
        # if using DS Zero 3 and the weights are initialized empty
        if num_params == 0 and hasattr(param, "ds_numel"):
            num_params = param.ds_numel

        all_param += num_params
        if param.requires_grad:
            trainable_params += num_params
    if use_4bit:
        trainable_params /= 2
    print(
        f"all params: {all_param:,d} || trainable params: {trainable_params:,d} || trainable%: {100 * trainable_params / all_param}"
    )
def load_model(model_name, bnb_config):
    n_gpus = torch.cuda.device_count()
    max_memory = f'{40960}MB'
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto", # dispatch efficiently the model on the available ressources
        max_memory = {i: max_memory for i in range(n_gpus)},
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
    # Needed for LLaMA tokenizer
    tokenizer.pad_token = tokenizer.eos_token
    return model, tokenizer


# Load Dataset

In [None]:
import pandas as pd
dataset = load_dataset("snli")
snli_sampled=pd.DataFrame(dataset["train"])
snli_sampled=snli_sampled.sample(frac=0.01, random_state=123)

Downloading builder script:   0%|          | 0.00/3.82k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.90k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/14.1k [00:00<?, ?B/s]

Downloading and preparing dataset snli/plain_text to /root/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b...


Downloading:   0%|          | 0.00/1.93k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.26M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/65.9M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.26M [00:00<?, ?B/s]

Dataset snli downloaded and prepared to /root/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

# Organize Dataset

In [None]:
from datasets import Dataset

mapp={0: "Entailment: premise entails hypothesis",
      1: "Neutral: there is no relation between premise and hypothesis",
      2: "Contradiction: premise contradicts hypothesis"}

snli_sampled= snli_sampled[snli_sampled.label>-1]

train_df=snli_sampled[:4000]
test_df=snli_sampled[4000:]

train_df2=Dataset.from_pandas(train_df)
train_df2=train_df2.map(create_prompt_formats_SNLI)

test_df2=Dataset.from_pandas(test_df)
test_df2=test_df2.map(create_prompt_formats_SNLI)

train2.shape, test2.shape

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1497 [00:00<?, ? examples/s]

# Model Loading

In [None]:
from huggingface_hub import login
access_token_read = "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"
access_token_write = "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"
login(token = access_token_read)

model_name = "daryl149/llama-2-7b-chat-hf"
bnb_config = create_bnb_config()
model, tokenizer = load_model(model_name, bnb_config)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
## Preprocess dataset
max_length = get_max_length(model)
trained2_pro = preprocess_dataset(tokenizer, max_length, 42, train2)

Found max lenth: 2048
Preprocessing dataset...


Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/4000 [00:00<?, ? examples/s]

# Train trainer

In [None]:
output_dir = "results/llama2/final_checkpoint"
train(model, tokenizer, trained2_pro, output_dir)

In [None]:
output_dir

'results/llama2/final_checkpoint'

# Merging and saving

In [None]:
tokenizer

In [None]:
model = AutoPeftModelForCausalLM.from_pretrained(output_dir, device_map="auto", torch_dtype=torch.bfloat16)
model = model.merge_and_unload()
output_merged_dir = "results/llama2/final_merged_checkpoint"
os.makedirs(output_merged_dir, exist_ok=True)
model.save_pretrained(output_merged_dir, safe_serialization=True)

# save tokenizer for easy inference
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.save_pretrained(output_merged_dir)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

('results/llama2/final_merged_checkpoint/tokenizer_config.json',
 'results/llama2/final_merged_checkpoint/special_tokens_map.json',
 'results/llama2/final_merged_checkpoint/tokenizer.json')

# Inference and Evaluation

In [None]:
from transformers import AutoTokenizer
import transformers
import torch

model_path = "results/llama2/final_merged_checkpoint"
tokenizer = AutoTokenizer.from_pretrained(model_path)

pipeline = transformers.pipeline(
    "text-generation",
    model=model_path,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
    #device='cuda:0'
)

sequences = pipeline(
    'I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n',
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=200,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

In [None]:
print(test2["text"][0])

Below is an NLI instruction that describes a NLI task. .

### Instruction:
 Detect whether the given “hypothesis” logically follows from the 'premise'

Input:
 Hypothesis: As it is their first time in the United states,the foreign exchange students are enjoying on the streets of New York., Premise:A group of foreign exchange students having a good time and enjoying the city life in the astonishingly busy streets of New York as they enjoy their first time being in the United States.  

### Response:
Entailment: premise entails hypothesis

### End


In [None]:
topla=[]
for i,t in enumerate(test2['text']):
  #print("*"*30)
  if i%50==0:
    print(i, "/",len(test2))
  inp= t.split("### Response")[0] +" ### Response"
  out_real= t.split("### Response")[1]
  #print(inp)
  #print("GROUND:"+out_real)
  sequences = pipeline(
    inp,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=200,
  )
  topla.append((inp, out_real,sequences))
  #print("PRED...",sequences[0]["generated_text"].lower().split("response")[1])

In [None]:
sequences[0]["generated_text"]

"Below is an NLI instruction that describes a NLI task. .\n\n### Instruction:\n Detect whether the given “hypothesis” logically follows from the 'premise'\n\nInput:\n Hypothesis: As it is their first time in the United states,the foreign exchange students are enjoying on the streets of New York., Premise:A group of foreign exchange students having a good time and enjoying the city life in the astonishingly busy streets of New York as they enjoy their first time being in the United States.  \n\n ### Response:\nContradiction: premise contradicts hypothesis\n\n### End:\nNeutral: there is no relation between premise and hypothesis\n\n### End:\nNeutral: there is no relation between premise and hypothesis\n\n### End:\nNeutral: there is no relation between premise and hypothesis\n\n### End:"

In [None]:
pd.DataFrame(topla).to_csv("testoutllm.csv")

In [None]:
q=pd.DataFrame(topla)

In [None]:
import pandas as pd

In [None]:
df=pd.read_csv("testoutllm.csv")

In [None]:
df.head(10)

Unnamed: 0.1,Unnamed: 0,0,1,2
0,0,Below is an NLI instruction that describes a N...,:\nEntailment: premise entails hypothesis\n\n#...,"[{'generated_text': ""Below is an NLI instructi..."
1,1,Below is an NLI instruction that describes a N...,:\nNeutral: there is no relation between premi...,"[{'generated_text': ""Below is an NLI instructi..."
2,2,Below is an NLI instruction that describes a N...,:\nEntailment: premise entails hypothesis\n\n#...,"[{'generated_text': ""Below is an NLI instructi..."
3,3,Below is an NLI instruction that describes a N...,:\nEntailment: premise entails hypothesis\n\n#...,"[{'generated_text': ""Below is an NLI instructi..."
4,4,Below is an NLI instruction that describes a N...,:\nEntailment: premise entails hypothesis\n\n#...,"[{'generated_text': ""Below is an NLI instructi..."
5,5,Below is an NLI instruction that describes a N...,:\nNeutral: there is no relation between premi...,"[{'generated_text': ""Below is an NLI instructi..."
6,6,Below is an NLI instruction that describes a N...,:\nEntailment: premise entails hypothesis\n\n#...,"[{'generated_text': ""Below is an NLI instructi..."
7,7,Below is an NLI instruction that describes a N...,:\nEntailment: premise entails hypothesis\n\n#...,"[{'generated_text': ""Below is an NLI instructi..."
8,8,Below is an NLI instruction that describes a N...,:\nEntailment: premise entails hypothesis\n\n#...,"[{'generated_text': ""Below is an NLI instructi..."
9,9,Below is an NLI instruction that describes a N...,:\nNeutral: there is no relation between premi...,"[{'generated_text': ""Below is an NLI instructi..."


In [None]:
q.head(50)

In [None]:
q.columns= ["inp","real","pred"]

In [None]:
def extract_answer(ans):
  if "neutral" in ans.split("### Response")[1].lower():
    return "Neutral"
  if "contradiction" in ans.split("### Response")[1].lower():
    return "Contradiction"
  if "entailment" in ans.split("### Response")[1].lower():
    return "Entailment"
  return ""

def extract_real(a):
  if True:
   if "neutral" in a.lower():
    return "Neutral"
   if "contradiction" in a.lower():
    return "Contradiction"
   if "entailment" in a.lower():
    return "Entailment"
   return ""


In [None]:
q["pred2"]=q.pred.apply(lambda x: extract_answer(x[0]['generated_text']))

In [None]:
q["real2"]=q.real.apply(lambda x: extract_real(x))

In [None]:
result= list(zip(q.real2, q.pred2))

In [None]:
res2=[a==b for a,b in result]
sum(res2), len(res2), sum(res2) / len(res2),

(426, 1497, 0.2845691382765531)

In [None]:
result

# diger bazı kodlar

In [None]:
#%%capture
!pip install transformers==4.29.0  peft datasets==2.12.0

In [None]:
#pip install peft

In [None]:
!pip list |egrep transformers
!pip list |egrep peft
!pip list |egrep ^datasets


transformers                     4.29.0
peft                             0.4.0
datasets                         2.12.0


In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from transformers import (default_data_collator,
                          get_linear_schedule_with_warmup)
from peft import (get_peft_config,
                  get_peft_model,
                  get_peft_model_state_dict,
                  LoraConfig,
                  TaskType)
import torch, os
from torch.utils.data import DataLoader
from tqdm import tqdm
import pandas as pd
from datasets import (load_dataset,
                      Dataset, DatasetDict)

# Loading SNLI dataset from huggingface dataset hub

In [None]:
dataset = load_dataset("snli")
snli_sampled=pd.DataFrame(dataset["train"])
snli_sampled=snli_sampled.sample(frac=0.01, random_state=123)

Downloading builder script:   0%|          | 0.00/3.82k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.90k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/14.1k [00:00<?, ?B/s]

Downloading and preparing dataset snli/plain_text to /root/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b...


Downloading:   0%|          | 0.00/1.93k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.26M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/65.9M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.26M [00:00<?, ?B/s]

Dataset snli downloaded and prepared to /root/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
snli_sampled.shape

(5502, 3)

In [None]:
snli_sampled.label.value_counts()

 2    1866
 0    1828
 1    1803
-1       5
Name: label, dtype: int64

In [None]:
snli_sampled= snli_sampled[snli_sampled.label>-1]

In [None]:
names=dataset["train"].features["label"].names
names

['entailment', 'neutral', 'contradiction']

In [None]:
mapp=dict(enumerate(names))
mapp

{0: 'entailment', 1: 'neutral', 2: 'contradiction'}

## Preparing dataset for training

In [None]:
snli_sampled_df= pd.DataFrame(snli_sampled)
snli_sampled_df["text"]= snli_sampled_df\
      .apply(lambda x: "S1:" +x.premise
             +" S2:"+x.hypothesis+
             ". The relation between S1 and S2 is labeled "+
             "as entailment, neutral or contradiction ?",
            axis=1)
snli_sampled_df["label"]=snli_sampled_df\
        .apply(lambda x: f"It is {mapp[x.label]}",
               axis=1)

In [None]:
snli_sampled_df.head(3)

Unnamed: 0,premise,hypothesis,label,text
190268,Two firefighters clad in protective gear are e...,Two firefighters are entering a house.,It is entailment,S1:Two firefighters clad in protective gear ar...
300525,Two men work together on a construction project.,Two men are working.,It is entailment,S1:Two men work together on a construction pro...
465851,Three men in uniform walk around town.,Three men rob the residents.,It is contradiction,S1:Three men in uniform walk around town. S2:T...


In [None]:
CUT=snli_sampled_df.shape[0]*7//10
print(f"Training set size is {CUT}")
print(f"Validation set size is \
        {snli_sampled_df.shape[0]-CUT}")
print(f"Total size is {snli_sampled_df.shape[0]}")

snli_sampled_dict= DatasetDict({"train":
                         Dataset.from_pandas(snli_sampled_df[:CUT]),
                        "validation":
                         Dataset.from_pandas(snli_sampled_df[CUT:]),
                        })

Training set size is 3847
Validation set size is         1650
Total size is 5497


In [None]:
def preprocess_function(examples):
    inputs = examples["text"]
    targets = examples["label"]
    model_inputs = tokenizer(inputs, max_length=max_length,
                             padding="max_length",
                             truncation=True,
                             return_tensors="pt")
    labels = tokenizer(targets,
                       max_length=max_target_len,
                       padding="max_length",
                       truncation=True,
                       return_tensors="pt")
    labels = labels["input_ids"]
    labels[labels == tokenizer.pad_token_id] = -100
    model_inputs["labels"] = labels
    return model_inputs

In [None]:
model_name_or_path="google/flan-t5-base" # 250M parameters
#model_name_or_path="google/flan-t5-large" # 780M parameters
#model_name_or_path="google/flan-t5-xl" # 3B parameters
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

In [None]:
max_length = 150
max_target_len=10

snli_processed = snli_sampled_dict.map(
    preprocess_function,
    batched=True,
    num_proc=1,
    remove_columns=snli_sampled_dict["train"].column_names,
    load_from_cache_file=False,
  )
train_dataset = snli_processed["train"]
eval_dataset = snli_processed["validation"]

Map:   0%|          | 0/3847 [00:00<?, ? examples/s]

Map:   0%|          | 0/1650 [00:00<?, ? examples/s]

In [None]:
pd.DataFrame(train_dataset).head(3)

Unnamed: 0,input_ids,attention_mask,labels
0,"[180, 536, 10, 382, 210, 32, 29764, 3, 4651, 2...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[94, 19, 3, 35, 5756, 297, 1, -100, -100, -100]"
1,"[180, 536, 10, 382, 210, 32, 1076, 161, 544, 3...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[94, 19, 3, 35, 5756, 297, 1, -100, -100, -100]"
2,"[180, 536, 10, 11889, 15, 15, 1076, 16, 7117, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[94, 19, 27252, 1, -100, -100, -100, -100, -10..."


In [None]:
batch_size = 32
train_dataloader = DataLoader(
    train_dataset,
    shuffle=True,
    collate_fn=default_data_collator,
    batch_size=batch_size,
    pin_memory=True
)
eval_dataloader = DataLoader(
    eval_dataset,
    collate_fn=default_data_collator,
    batch_size=batch_size,
    pin_memory=True)

# Initilizing and Training PEFT model with LORA

In [None]:
# creating model with peft
# if you want to train the pipline in vanilla mode, set with_peft=False
with_peft=True
model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path)
lr=2e-5
if with_peft:
  lr=1e-3
  peft_config = LoraConfig(task_type=TaskType.SEQ_2_SEQ_LM,
                           inference_mode=False,
                           r=8,
                           lora_alpha=32,
                           lora_dropout=0.1)
  model = get_peft_model(model, peft_config)
  model.print_trainable_parameters()

trainable params: 884,736 || all params: 248,462,592 || trainable%: 0.3560841867092814


[]

In [None]:
device="cuda"
model = model.to(device)
num_epochs = 3

# optimizer and lr scheduler
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=(len(train_dataloader) * num_epochs),
)

import time
st = time.time()
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for step, batch in enumerate(tqdm(train_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.detach().float()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

    model.eval()
    eval_loss = 0
    eval_preds = []
    for step, batch in enumerate(tqdm(eval_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
        loss = outputs.loss
        eval_loss += loss.detach().float()
        eval_preds.extend(
            tokenizer.batch_decode(
                torch.argmax(outputs.logits, -1)\
                .detach().cpu().numpy(),
                skip_special_tokens=True)
        )
    eval_loss_avg = eval_loss / len(eval_dataloader)
    train_loss_avg = total_loss / len(train_dataloader)
    print(f"{epoch=}-> {train_loss_avg=}\t {eval_loss_avg=}")
et = time.time()
elapsed_time = et - st

100%|██████████| 121/121 [01:41<00:00,  1.19it/s]
100%|██████████| 52/52 [00:20<00:00,  2.48it/s]


epoch=0-> train_loss_avg=tensor(0.3133, device='cuda:0')	 eval_loss_avg=tensor(0.1024, device='cuda:0')


100%|██████████| 121/121 [01:40<00:00,  1.20it/s]
100%|██████████| 52/52 [00:20<00:00,  2.49it/s]


epoch=1-> train_loss_avg=tensor(0.1119, device='cuda:0')	 eval_loss_avg=tensor(0.0852, device='cuda:0')


100%|██████████| 121/121 [01:41<00:00,  1.20it/s]
100%|██████████| 52/52 [00:20<00:00,  2.49it/s]

epoch=2-> train_loss_avg=tensor(0.0946, device='cuda:0')	 eval_loss_avg=tensor(0.0861, device='cuda:0')





In [None]:
zipped=zip(eval_preds, snli_sampled_dict["validation"]["label"])
q=[real.strip() in pred.strip() for pred,real in zipped]
print(f"{model_name_or_path=}")
print(f"{num_epochs=}")
print(f"{elapsed_time=:.2f} seconds"
     + (" with PEFT" if with_peft else  " without PEFT"))
print(f"Accuracy:{sum(q)/len(q):.2f}")

model_name_or_path='google/flan-t5-base'
num_epochs=3
elapsed_time=366.41 seconds with PEFT
Accuracy:0.87


# Saving PEFT models

In [None]:
# saving model
peft_model_path="my_lora_model"
model.save_pretrained(peft_model_path)

In [None]:
!ls -lh $peft_model_path

total 3.5M
-rw-r--r-- 1 root root  332 May 28 09:49 adapter_config.json
-rw-r--r-- 1 root root 3.5M May 28 09:49 adapter_model.bin


# Şuna Bir bakalimü

merge filan

In [None]:
model = AutoPeftModelForCausalLM.from_pretrained(output_dir, device_map="auto", torch_dtype=torch.bfloat16)
model = model.merge_and_unload()

output_merged_dir = "results/llama2/final_merged_checkpoint"
os.makedirs(output_merged_dir, exist_ok=True)
model.save_pretrained(output_merged_dir, safe_serialization=True)

# save tokenizer for easy inference
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.save_pretrained(output_merged_dir)

# Loading Saved PEFT model

In [None]:
from peft import PeftModel, PeftConfig
config = PeftConfig.from_pretrained(peft_model_path)
config

PeftConfig(peft_type='LORA', base_model_name_or_path='google/flan-t5-base', task_type='SEQ_2_SEQ_LM', inference_mode=True)

In [None]:
model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(model, peft_model_path)
model.eval()

In [None]:
my_text= snli_sampled_dict["validation"]["text"][0]
my_label= snli_sampled_dict["validation"]["label"][0]
print(f"{my_text=}")
print(f"{my_label=}")

my_text='S1:The young man wearing a blue sweatshirt and blue jeans is hopping over the railing on the fence. S2:The young man is hopping over the railing to save the young girl from drowning in the pool.. The relation between S1 and S2 is labeled as entailment, neutral or contradiction ?'
my_label='It is neutral'


In [None]:
inputs = tokenizer(my_text, return_tensors="pt")
print(inputs)

{'input_ids': tensor([[  180,   536,    10,   634,  1021,   388,  5119,     3,     9,  1692,
         10242,  9486,    11,  1692, 14118,    19,     3, 21714,   147,     8,
          6579,    53,    30,     8,  8227,     5,   180,   357,    10,   634,
          1021,   388,    19,     3, 21714,   147,     8,  6579,    53,    12,
          1097,     8,  1021,  3202,    45, 24614,    53,    16,     8,  2201,
             5,     5,    37,  4689,   344,   180,   536,    11,   180,   357,
            19,  3783,    15,    26,    38,     3,    35,  5756,   297,     6,
          7163,    42, 27252,     3,    58,     1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1]])}


In [None]:
with torch.no_grad():
    outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=10)
    print(outputs)
    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))

tensor([[   0,   94,   19, 7163,    1]])
['It is neutral']


#Train with QLora

In [None]:
!pip install bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.41.1


In [None]:
from transformers import BitsAndBytesConfig
from peft import prepare_model_for_kbit_training

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path,
                                              quantization_config=bnb_config,
                                              device_map={"":0})

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

In [None]:
model.print_trainable_parameters()

trainable params: 884,736 || all params: 248,462,592 || trainable%: 0.3560841867092814


In [None]:
device="cuda"
model = model.to(device)
num_epochs = 3

# optimizer and lr scheduler
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=(len(train_dataloader) * num_epochs),
)

import time
st = time.time()
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for step, batch in enumerate(tqdm(train_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.detach().float()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

    model.eval()
    eval_loss = 0
    eval_preds = []
    for step, batch in enumerate(tqdm(eval_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
        loss = outputs.loss
        eval_loss += loss.detach().float()
        eval_preds.extend(
            tokenizer.batch_decode(
                torch.argmax(outputs.logits, -1)\
                .detach().cpu().numpy(),
                skip_special_tokens=True)
        )
    eval_loss_avg = eval_loss / len(eval_dataloader)
    train_loss_avg = total_loss / len(train_dataloader)
    print(f"{epoch=}-> {train_loss_avg=}\t {eval_loss_avg=}")
et = time.time()
elapsed_time = et - st

  0%|          | 0/121 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
100%|██████████| 121/121 [02:23<00:00,  1.19s/it]
100%|██████████| 52/52 [00:20<00:00,  2.59it/s]


epoch=0-> train_loss_avg=tensor(0.3276, device='cuda:0')	 eval_loss_avg=tensor(0.0943, device='cuda:0')


100%|██████████| 121/121 [02:26<00:00,  1.21s/it]
100%|██████████| 52/52 [00:20<00:00,  2.60it/s]


epoch=1-> train_loss_avg=tensor(0.1111, device='cuda:0')	 eval_loss_avg=tensor(0.0894, device='cuda:0')


100%|██████████| 121/121 [02:26<00:00,  1.21s/it]
100%|██████████| 52/52 [00:20<00:00,  2.60it/s]

epoch=2-> train_loss_avg=tensor(0.0984, device='cuda:0')	 eval_loss_avg=tensor(0.0839, device='cuda:0')





In [None]:
zipped=zip(eval_preds, snli_sampled_dict["validation"]["label"])
q=[real.strip() in pred.strip() for pred,real in zipped]
print(f"{model_name_or_path=}")
print(f"{num_epochs=}")
print(f"{elapsed_time=:.2f} seconds"
     + (" with PEFT" if with_peft else  " without PEFT"))
print(f"Accuracy:{sum(q)/len(q):.2f}")

model_name_or_path='google/flan-t5-base'
num_epochs=3
elapsed_time=495.84 seconds with PEFT
Accuracy:0.86
