# Install Libraries

In [None]:
!pip install -q xturing datasets

In [None]:
!pip install -q bitsandbytes==0.44.0  accelerate==1.1.1, peft==0.11.0

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/251.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.2/251.2 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install -U triton=='3.1.0'

# Process data

In [None]:
# For the original dataset, download the data from the codelabs link in the github repository https://github.com/kelvinguu/qanli
# Then, use the code below to load it and process it
import pandas as pd
df1 = pd.read_csv('train.csv',header=0,sep='\t',encoding='utf-8')
df2 = pd.read_csv('dev.csv',header=0,sep='\t',encoding='utf-8')
df = pd.concat([df1, df2])

df = pd.concat([df1, df2])

df['turker_answer_mod'] = df.apply(lambda x : x['turker_answer'].replace(str(x['answer']),'MASK'),axis=1)
df = df[df['turker_answer_mod'].str.contains('MASK')]
df['turker_answer_mod'].str.contains('MASK')
df['turker_answer_mod'] = df['turker_answer_mod'].astype('unicode')
df['question'] = df['question'].astype('unicode')
df['turker_answer_mod'] = df.apply(lambda x : x['turker_answer'] if str(x['answer']) in x['turker_answer'] else "NONEE" ,axis=1)
df = df[df['turker_answer_mod'].str.contains('NONEE')==False]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['turker_answer_mod'] = df['turker_answer_mod'].astype('unicode')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['question'] = df['question'].astype('unicode')


In [None]:
inputs = []
outputs = []
for index, row in df.iterrows():
    if row['question'] != ''  and row['answer'] != '' and row['turker_answer'] != '':
        inputs.append(str(row['question'])+'<BSA>'+str(row['answer'])+"<ESA>")
        outputs.append('<BLA>'+row['turker_answer']+'<ELA>')

In [None]:
import random
random_indices = random.sample(range(len(inputs)), 30000)

inputs = [inputs[i] for i in random_indices]
outputs = [outputs[i] for i in random_indices]

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=0.20)

In [None]:
dataset = {
    "text":X_train,
    "target":y_train
}

val_dataset = {
    "text":X_test,
    "target":y_test
}

In [None]:
from xturing.datasets.instruction_dataset import InstructionDataset
from xturing.datasets.text_dataset import TextDataset

instruction_dataset = TextDataset(dataset)
val_instruction_dataset = TextDataset(val_dataset)

[2025-04-01 13:14:57,514] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)


# Load and Train Model

In [None]:
from xturing.models.base import BaseModel
model = BaseModel.create("llama_lora") # use llama_lora_int8 to train in 8 bit precision and save on resources

Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

trainable params: 4194304 || all params: 6742609920 || trainable%: 0.06220594176090199


In [None]:
finetuning_config = model.finetuning_config()
finetuning_config.batch_size = 4
finetuning_config.learning_rate = 1e-3
finetuning_config.weight_decay = 0
finetuning_config.max_grad_norm = 50.0
finetuning_config.optimizer_name = "adamw"
finetuning_config.num_train_epochs = 1
finetuning_config.max_length = 200

In [None]:
model.finetune(dataset=instruction_dataset)

In [None]:
model.save("saved_model")

  Remove the foloowing fields from the adapter config json file:
  
  "enable_lora": null,
  "merge_weights": false,


# Generate with Transformers and PEFT

In [None]:
path = './saved_model'
import torch
import transformers
from transformers import LlamaTokenizer, LlamaConfig, LlamaForCausalLM
from peft import PeftModel

tokenizer = LlamaTokenizer.from_pretrained(path)

#transformer loaded. load  model.

model = LlamaForCausalLM.from_pretrained(
    'aleksickx/llama-7b-hf',
    load_in_8bit=False,
    torch_dtype=torch.float16,
    device_map='auto'
)




In [None]:
#peft loaded. load lora.
model = PeftModel.from_pretrained(
    model,
    path,
    torch_dtype=torch.float16,
    device_map='auto'
)

In [None]:
from transformers import StoppingCriteria, StoppingCriteriaList
import torch
class StoppingCriteriaSub(StoppingCriteria):

    def __init__(self, stops ,tokenizer):
        super().__init__()
        self.stops = [stop for stop in stops]
        self.tokenizer = tokenizer
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
        for stop in self.stops:
            if  stop in self.tokenizer.decode(input_ids[0]):
                return True

        return False

In [None]:
stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(['<ELA>'],tokenizer)])
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

In [None]:
def generate(text):
    input_ids = tokenizer(text, return_tensors='pt',padding=True,max_length=1000,truncation=True).to('cuda')
    with torch.no_grad():
        generated_ids = model.generate(**input_ids,max_length=500,stopping_criteria=stopping_criteria,do_sample=False,top_k=1,top_p=0.5,use_cache=True)
        results = tokenizer.decode(generated_ids[0][input_ids['input_ids'][0].shape[0]:],skip_special_tokens=True)
        return results

In [None]:
from tqdm import tqdm
inputs = []
outputs = []
long_answers = []
for inpt in tqdm(X_test):
    if inpt:
        text = generate(inpt)
        long_answers.append(text)