In [None]:
# Install Pytorch & other libraries
!pip install "torch==2.1.2" tensorboard

# Install Hugging Face libraries
!pip install  --upgrade \
  "transformers==4.36.2" \
  "datasets==2.16.1" \
  "accelerate==0.26.1" \
  "evaluate==0.4.1" \
  "bitsandbytes==0.42.0" \
  # "trl==0.7.10" # \
  # "peft==0.7.1" \

# install peft & trl from github
!pip install git+https://github.com/huggingface/trl@a3c5b7178ac4f65569975efadc97db2f3749c65e --upgrade
!pip install git+https://github.com/huggingface/peft@4a1559582281fc3c9283892caea8ccef1d6f5a4f --upgrade

In [None]:
from huggingface_hub import login

login(
  token="No chance", # ADD YOUR TOKEN HERE
  add_to_git_credential=True
)

In [None]:
! nvidia-smi

In [None]:
from datasets import load_dataset

# Load jsonl data from disk
dataset = load_dataset("json", data_files="train_dataset_newsgroup_with_noise.json", split="train")
eval_dataset = load_dataset("json", data_files="test_dataset_newsgroup.json", split="train")

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import setup_chat_format
from peft import LoraConfig, PeftConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training

# Hugging Face model id
model_id = "codellama/CodeLlama-7b-hf"

# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16
)

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    # attn_implementation="sdpa",
    torch_dtype=torch.float16,
    quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.padding_side = 'right' # to prevent warnings

# # set chat template to OAI chatML, remove if you start from a fine-tuned model
model, tokenizer = setup_chat_format(model, tokenizer)


model = prepare_model_for_kbit_training(model)

In [None]:
from peft import LoraConfig

# LoRA config based on QLoRA paper & Sebastian Raschka experiment
peft_config = LoraConfig(
        lora_alpha=32,
        lora_dropout=0.05,
        r=16,
        bias="none",
        target_modules=[
            "q_proj",
            "v_proj",
            "k_proj",
            "o_proj",
            # "up_proj",
            # "gate_proj",
            # "down_proj"
            ],
        task_type="CAUSAL_LM"
)
model.gradient_checkpointing_enable()
peft_model = get_peft_model(model, peft_config)
peft_model.print_trainable_parameters()

In [None]:
del model
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="code-llama-7b-Text-classification", # directory to save and repository id
    num_train_epochs=3,                     # number of training epochs
    per_device_train_batch_size=4,          # batch size per device during training
    gradient_accumulation_steps=4,          # number of steps before performing a backward/update pass
    gradient_checkpointing=True,            # use gradient checkpointing to save memory
    optim="paged_adamw_8bit",              # use fused adamw optimizer
    logging_steps=10,                       # log every 10 steps
    save_strategy="epoch",                  # save checkpoint every epoch
    learning_rate=2e-5,                     # learning rate, based on QLoRA paper
    bf16=False,                              # use bfloat16 precision
    tf32=False,                              # use tf32 precision
    max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper
    warmup_steps=100,                      # warmup ratio based on QLoRA paper
    lr_scheduler_type="constant",           # use constant learning rate scheduler
    push_to_hub=True,                       # push model to hub
    report_to="tensorboard",                # report metrics to tensorboard
    evaluation_strategy="steps",
    weight_decay=0.01

)

In [None]:
from trl import SFTTrainer

max_seq_length = 3072 # max sequence length for model and packing of the dataset
# peft_model.peft_config.use_cache = False



trainer = SFTTrainer(
    model=peft_model,
    args=args,
    train_dataset=dataset,
    # eval_dataset=eval_dataset,
    peft_config=peft_config,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    packing=True,
    dataset_kwargs={
        "add_special_tokens": False,  # We template with special tokens
        "append_concat_token": False, # No need to add additional separator token
    }
)


In [None]:

! nvidia-smi

In [None]:
trainer.train()
trainer.save_model()


In [None]:

# del peft_model
# del trainer

In [None]:
# torch.cuda.empty_cache()

In [None]:
# del peft_model
# del trainer
# torch.cuda.empty_cache()

In [None]:
# import torch
# from peft import AutoPeftModelForCausalLM
# from transformers import AutoTokenizer, pipeline, BitsAndBytesConfig

# peft_model_id = "./code-llama-7b-Text-classification/checkpoint-144"
# # peft_model_id = args.output_dir
# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16
# )

# # Load Model with PEFT adapter
# model = AutoPeftModelForCausalLM.from_pretrained(
#   peft_model_id,
#   device_map="auto",
#   torch_dtype=torch.float16,
#   quantization_config=bnb_config
# )
# tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
# # load into pipeline
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

In [None]:
# from tqdm import tqdm
# from datasets import load_dataset


# NUM_LABELS = 6
# id2label = {
#     0:"bydate_sci.space",
#     1:"bydate_sci.med",
#     2:"bydate_sci.electronics",
#     3:'bydate_talk.politics.guns',
#     4:'bydate_talk.politics.mideast',
#     5:'bydate_talk.politics.misc'

# }
# label2id={
#     "bydate_sci.space":0,
#     "bydate_sci.med":1,
#     "bydate_sci.electronics":2,
#     'bydate_talk.politics.guns':3,
#     'bydate_talk.politics.mideast':4,
#     'bydate_talk.politics.misc':5
# }


# def evaluate(sample):
#     prompt = pipe.tokenizer.apply_chat_template(sample["messages"][:2], tokenize=False, add_generation_prompt=True)
#     outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)
#     predicted_answer = outputs[0]['generated_text'][len(prompt):].strip()
#     if label2id[predicted_answer] == sample["messages"][2]["content"]:
#         return 1
#     else:
#         return 0

# success_rate = []
# number_of_eval_samples = 1000
# # iterate over eval dataset and predict
# eval_dataset = load_dataset("json", data_files="test_dataset_newsgroup.json", split="train").shuffle().select(range(number_of_eval_samples))



In [None]:
# for s in tqdm(eval_dataset):
#     success_rate.append(evaluate(s))

# # compute accuracy
# accuracy = sum(success_rate)/len(success_rate)

# print(f"Accuracy: {accuracy*100:.2f}%")