In [None]:
# Install Pytorch & other libraries
!pip install "torch==2.1.2" tensorboard

# Install Hugging Face libraries
!pip install  --upgrade \
  "datasets==2.16.1" \
  "accelerate==0.26.1" \
  "evaluate==0.4.1" \
  "bitsandbytes==0.42.0" \
  # "trl==0.7.10" # \
  # "peft==0.7.1" \
  #

# install peft & trl from github
!pip install git+https://github.com/huggingface/trl.git
!pip install git+https://github.com/huggingface/peft.git
! pip install --no-cache git+https://github.com/huggingface/transformers.git

In [None]:
# from trl import setup_chat_format, SFTTrainer

In [None]:
from huggingface_hub import notebook_login
notebook_login()


In [None]:
from transformers import AutoTokenizer
from datasets import load_dataset, Dataset

# Load dataset
combined_dataset = load_dataset("SetFit/20_newsgroups")
train_data = combined_dataset["train"]
test_data = combined_dataset["test"]

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained("openlm-research/open_llama_3b_v2",
                                              use_fast=False,
                                            trust_remote_code=True,
                                            pad_token="<|endoftext|>")
if tokenizer.pad_token_id is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Set maximum token length
max_token_length = 512

# Filter data by max token length
train_data = [example for example in train_data if len(tokenizer(example["text"])["input_ids"]) <= max_token_length]
test_data = [example for example in test_data if len(tokenizer(example["text"])["input_ids"]) <= max_token_length]

# Convert filtered lists to dictionaries suitable for Dataset.from_dict()
train_dict = {"text": [example["text"] for example in train_data], "label": [example["label"] for example in train_data]}
eval_dict = {"text": [example["text"] for example in test_data], "label": [example["label"] for example in test_data]}

# Create Dataset objects
train_dataset = Dataset.from_dict(train_dict)
eval_dataset = Dataset.from_dict(eval_dict)
# Tokenization and padding function
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=max_token_length)

# Tokenize and pad datasets
tokenized_dataset_train = train_dataset.map(tokenize_function, batched=True)
tokenized_dataset_test = eval_dataset.map(tokenize_function, batched=True)

# Print the tokenized datasets
print(tokenized_dataset_train, tokenized_dataset_test)



Repo card metadata block was not found. Setting CardData to empty.


You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message


Token indices sequence length is longer than the specified maximum sequence length for this model (2881 > 2048). Running this sequence through the model will result in indexing errors


Map:   0%|          | 0/10019 [00:00<?, ? examples/s]

Map:   0%|          | 0/6707 [00:00<?, ? examples/s]

Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 10019
}) Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 6707
})


In [None]:
NUM_LABELS = 20
# print(len(combined_dataset))

id2label = {
    0: "alt.atheism",
    1: "comp.graphics",
    2: "comp.os.ms-windows.misc",
    3: "comp.sys.ibm.pc.hardware",
    4: "comp.sys.mac.hardware",
    5: "comp.windows.x",
    6: "misc.forsale",
    7: "rec.autos",
    8: "rec.motorcycles",
    9: "rec.sport.baseball",
    10: "rec.sport.hockey",
    11: "sci.crypt",
    12: "sci.electronics",
    13: "sci.med",
    14: "sci.space",
    15: "soc.religion.christian",
    16: "talk.politics.guns",
    17: "talk.politics.mideast",
    18: "talk.politics.misc",
    19: "talk.religion.misc"
}

label2id = {
    "alt.atheism": 0,
    "comp.graphics": 1,
    "comp.os.ms-windows.misc": 2,
    "comp.sys.ibm.pc.hardware": 3,
    "comp.sys.mac.hardware": 4,
    "comp.windows.x": 5,
    "misc.forsale": 6,
    "rec.autos": 7,
    "rec.motorcycles": 8,
    "rec.sport.baseball": 9,
    "rec.sport.hockey": 10,
    "sci.crypt": 11,
    "sci.electronics": 12,
    "sci.med": 13,
    "sci.space": 14,
    "soc.religion.christian": 15,
    "talk.politics.guns": 16,
    "talk.politics.mideast": 17,
    "talk.politics.misc": 18,
    "talk.religion.misc": 19
}


In [None]:
! nvidia-smi

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoModelForSequenceClassification,LlamaTokenizer, LlamaForCausalLM,LlamaForSequenceClassification
# from trl import setup_chat_format
from peft import LoraConfig, PeftConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training

# Hugging Face model id
model_id = "openlm-research/open_llama_3b_v2" # or `mistralai/Mistral-7B-v0.1`

# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16
)

# Load model and tokenizer
model = LlamaForSequenceClassification.from_pretrained(
    model_id,
    num_labels= 20,
    torch_dtype = torch.float16,
    device_map="auto"

)




model.resize_token_embeddings(len(tokenizer))

model = prepare_model_for_kbit_training(model)

In [None]:
print(model)

In [None]:
from peft import LoraConfig

# LoRA config based on QLoRA paper & Sebastian Raschka experiment
peft_config = LoraConfig(
        lora_alpha=32,
        lora_dropout=0.05,
        r=16,
        bias="none",
        target_modules="all-linear",
        task_type="SEQ_CLS",
)
model.gradient_checkpointing_enable()
peft_model = get_peft_model(model, peft_config)
peft_model.print_trainable_parameters()

trainable params: 25,541,440 || all params: 3,349,733,760 || trainable%: 0.7625


In [None]:
# del model
from transformers import TrainingArguments


args = TrainingArguments(
    output_dir="openlm-research/open_llama_3b_v2_new_newsgroup_full",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    optim="adamw_torch_fused",
    logging_steps=100,
    # save_strategy="epoch",
    warmup_steps=500,
    bf16=False,
    tf32=False,
    push_to_hub=True,
    report_to="tensorboard",
    disable_tqdm=False,
    # load_best_model_at_end=True,
    # metric_for_best_model="accuracy",
    evaluation_strategy="no"  # Disable evaluation during training
)



In [None]:
from transformers import Trainer
del model
trainer = Trainer(
    model=peft_model,
    args=args,
    train_dataset=tokenized_dataset_train,
    # eval_dataset=tokenized_dataset_test,
)

In [None]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
trainer.train()
trainer.save_model("./openlm-research/open_llama_3b_v2-Text-Classifier_new")
trainer.push_to_hub("openlm-research/open_llama_3b_v2-Classifier_new")

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.




Step,Training Loss
100,3.7631
200,2.2195
300,1.5671
400,1.4769
500,1.3854
600,1.2016
700,1.0425
800,1.0688
900,0.9668
1000,0.9024


































CommitInfo(commit_url='https://huggingface.co/Noodle-bg/open_llama_3b_v2_new_newsgroup_full/commit/2b425ada214d4ad4c6810bf4768185a4f90ff61c', commit_message='openlm-research/open_llama_3b_v2-Classifier_new', commit_description='', oid='2b425ada214d4ad4c6810bf4768185a4f90ff61c', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
# tokenized_dataset_train

In [None]:
# import torch
# torch.cuda.empty_cache()
# results = trainer.evaluate()

In [None]:
!nvidia-smi

Fri Jul  5 04:08:24 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.14              Driver Version: 550.54.14      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |


|   0  Quadro RTX 6000                Off |   00000000:65:00.0 Off |                  Off |
| 33%   51C    P2             69W /  260W |   14968MiB /  24576MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                                                         
+-----------------------------------------------------------------------------------------+
| Processes:                                                                              |
|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |
|        ID   ID                                                               Usage      |
|    0   N/A  N/A      2571      G   /usr/lib/xorg/Xorg                             99MiB |
|    0   N/A  N/A      2764      G   /usr/bin/gnome-shell               

In [None]:
# # free the memory again
# # del model
# del trainer
# torch.cuda.empty_cache()

In [None]:
# import torch
# from peft import AutoPeftModelForCausalLM
# from transformers import AutoTokenizer, pipeline, BitsAndBytesConfig, AutoModelForCausalLM
# from peft import PeftModel, PeftConfig
# from trl import setup_chat_format
# peft_model_id ="code-llama-7b-Text-classification-20_newsgroup"
# # peft_model_id = args.output_dir
# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16
# )

# # # Load Model with PEFT adapter
# # model = PeftModel.from_pretrained(
# #   peft_model_id,
# #   device_map="auto",
# #   torch_dtype=torch.float16,
# #   quantization_config=bnb_config
# # )
# model = AutoModelForCausalLM.from_pretrained("openlm-research/open_llama_3b_v2",
#                                                     device_map="auto",
#                                                     torch_dtype=torch.float16,
#                                                     quantization_config=bnb_config)
# tokenizer = AutoTokenizer.from_pretrained("openlm-research/open_llama_3b_v2")
# tokenizer.padding_side= 'right'
# model, tokenizer = setup_chat_format(model, tokenizer)
# model = PeftModel.from_pretrained(model, "open_llama_3b_v2-20_newsgroup_full")

# # load into pipeline
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)

In [None]:
# from datasets import load_dataset
# from random import randint
# # del model
# # Load our test dataset
# eval_dataset = load_dataset("json", data_files="test_dataset_20_newsgroups.json", split="train")
# rand_idx = randint(0, len(eval_dataset))

# # Test on sample
# prompt = pipe.tokenizer.apply_chat_template(eval_dataset[rand_idx]["messages"][:2], tokenize=False, add_generation_prompt=True)
# outputs = pipe(prompt, top_k=50, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)

# print(f"Query:\n{eval_dataset[rand_idx]['messages'][1]['content']}")
# print(f"Original Answer:\n{eval_dataset[rand_idx]['messages'][2]['content']}")
# print(f"Generated Answer:\n{outputs[0]['generated_text'][len(prompt):].strip()}")

In [None]:
# from tqdm import tqdm
# from datasets import load_dataset


# def evaluate(sample):
#     prompt = pipe.tokenizer.apply_chat_template(sample["messages"][:2], tokenize=False, add_generation_prompt=True)
#     outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.3, top_k=50, top_p=0.95, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)
#     predicted_answer = outputs[0]['generated_text'][len(prompt):].strip()
#     if int(predicted_answer) == int(sample["messages"][2]["content"]):
#         return 1
#     else:
#         return 0

# success_rate = []
# number_of_eval_samples = 1000
# # iterate over eval dataset and predict
# eval_dataset = load_dataset("json", data_files="test_dataset_20_newsgroups.json", split="train").shuffle().select(range(number_of_eval_samples))



In [None]:
# with torch.no_grad():
#     for s in eval_dataset:
#         success_rate.append(evaluate(s))

# # compute accuracy
# accuracy = sum(success_rate)/len(success_rate)

# print(f"Accuracy: {accuracy*100:.2f}%")