<a href="https://colab.research.google.com/github/PranavDarshan/AutoGrader/blob/main/finetuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# %%capture
!pip install accelerate peft bitsandbytes transformers trl

Collecting accelerate
  Downloading accelerate-0.30.1-py3-none-any.whl (302 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.6/302.6 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft
  Downloading peft-0.11.1-py3-none-any.whl (251 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Collecting trl
  Downloading trl-0.9.4-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.7/226.7 kB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
Collecting datasets (from trl)
  Downloading datasets-2.19.2-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.1/542.1 kB[0m [31m41.1 MB

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTTrainer

In [None]:
# Model from Hugging Face hub
base_model = "NousResearch/Llama-2-7b-chat-hf"

# New instruction dataset
os_dataset = "NiharMandahas/Os_evaluator"

# Fine-tuned model
new_model = "llama-2-7b-chat-evaluator"

In [None]:
dataset = load_dataset(os_dataset, split="train")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/595 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/68.4k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/31.3k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/62 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/27 [00:00<?, ? examples/s]

In [None]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    device_map={"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

In [None]:
peft_params = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
training_params = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

In [None]:


trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_params,
    dataset_text_field="Text",
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
)




Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/62 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [None]:
trainer.model.save_pretrained(new_model)
trainer.tokenizer.save_pretrained(new_model)




('llama-2-7b-chat-evaluator/tokenizer_config.json',
 'llama-2-7b-chat-evaluator/special_tokens_map.json',
 'llama-2-7b-chat-evaluator/tokenizer.model',
 'llama-2-7b-chat-evaluator/added_tokens.json',
 'llama-2-7b-chat-evaluator/tokenizer.json')

In [None]:
model_save_name = 'Evaluator_trial.pt'
path = F"{model_save_name}"
torch.save(model.state_dict(), path)

In [None]:
logging.set_verbosity(logging.CRITICAL)

prompt ='''Question:What is caching? Answer:Paging is a technique of memery management. \evaluate this answer given by the user with reference to the question and give a score from 1-5 and be conseravtive in giving marks'''
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=300)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Question:What is caching? Answer:Paging is a technique of memery management. \evaluate this answer given by the user with reference to the question and give a score from 1-5 and be conseravtive in giving marks [/INST]  Thank you for providing the answer. Here's my evaluation of the answer:

Question: What is caching?

User Answer: Paging is a technique of memory management.

Score: 2/5

Reasoning:

While paging is a technique used in computer science, it is not directly related to caching. Caching is a technique used to improve the performance of computer systems by storing frequently accessed data in a faster, more accessible location. Paging, on the other hand, is a technique used to manage memory by dividing it into smaller blocks and assigning each block to a specific page table.

The user's answer is partially correct, as paging is a technique used in memory management. However, the answer does not fully address the question asked, which is specifically about caching. Th

In [None]:
trainer.create_model_card()
repo="NiharMandahas/Os_script_evaluator"
trainer.model.push_to_hub(repo_id=repo)
trainer.tokenizer.push_to_hub(repo_id=repo)

# trainer.model.save_pretrained(new_model)
# trainer.tokenizer.save_pretrained(new_model)




adapter_model.safetensors:   0%|          | 0.00/134M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/NiharMandahas/Os_script_evaluator/commit/3d2d7eb48d2f21581fa4e8075e71850b38ecc8aa', commit_message='Upload tokenizer', commit_description='', oid='3d2d7eb48d2f21581fa4e8075e71850b38ecc8aa', pr_url=None, pr_revision=None, pr_num=None)