In [None]:
!pip install peft trl transformers torch bitsandbytes datasets
!pip install -U bitsandbytes
!pip install accelerate



In [None]:
import re
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

# Constants
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
MODEL_NAME = "meta-llama/Llama-2-7b-hf"
DEFAULT_SYSTEM_PROMPT = "Explain the main points of the given articles."

def extract_instruction_response(content):
    """Extracts instruction and response pairs from the given content."""
    pattern = r"<s>\[INST\](.*?)\[/INST\](.*?)</s>"
    matches = re.findall(pattern, content, re.DOTALL)
    return [{"instruction": inst.strip(), "response": resp.strip()} for inst, resp in matches]

def generate_training_prompt(instruction: str, response: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
    """Generates the training prompt from the instruction and response."""
    return f"""### Instruction: {system_prompt}

### Input:
{instruction.strip()}

### Response:
{response.strip()}
""".strip()

# Load and process the data
file_path = 'llama_formatted_data.txt'

with open(file_path, 'r') as file:
    content = file.read()

data = extract_instruction_response(content)

# Convert the data into training format
formatted_data = [generate_training_prompt(entry['instruction'], entry['response']) for entry in data]

# Split the data into training and validation sets (80-20 split)
train_size = int(0.8 * len(formatted_data))
train_data = formatted_data[:train_size]
validation_data = formatted_data[train_size:]

# Processed data in dictionary form
dataset = {
    "train": train_data,
    "validation": validation_data
}


In [None]:
from huggingface_hub import login
login()
#add the hugging face token in the login

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from datasets import Dataset
# Convert lists to Dataset objects
train_dataset = Dataset.from_list([{"text": text} for text in dataset["train"]])
validation_dataset = Dataset.from_list([{"text": text} for text in dataset["validation"]])

# Define the PEFT configuration
lora_r = 2
lora_alpha = 4
lora_dropout = 0.1
lora_target_modules = [
    "q_proj",
    "up_proj",
    "o_proj",
    "k_proj",
    "down_proj",
    "gate_proj",
    "v_proj",
]

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    target_modules=lora_target_modules,
    bias="none",
    task_type="CAUSAL_LM",
)

OUTPUT_DIR = "experiments"

training_arguments = TrainingArguments(
    per_device_train_batch_size=1,  # Adjusted to prevent OOM
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    logging_steps=32,
    learning_rate=1e-4,
    fp16=True,
    max_grad_norm=0.3,
    num_train_epochs=2,
    evaluation_strategy="steps",
    eval_steps=32,  # Adjusted to evaluate every 32 steps
    warmup_ratio=0.05,
    save_strategy="epoch",
    group_by_length=True,
    output_dir=OUTPUT_DIR,
    report_to="tensorboard",
    save_safetensors=True,
    lr_scheduler_type="cosine",
    seed=42,
)

print(f"Using device: {DEVICE}")  # Check which device is being used

# Initialize the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Set the padding token to be the same as the EOS token
tokenizer.pad_token = tokenizer.eos_token

# Load the model with device map
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="auto"  # Automatically manage device allocation
)

# Use the correctly defined Dataset objects
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,  # Use the Dataset object here
    eval_dataset=validation_dataset,  # Use the Dataset object here
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=128,
    tokenizer=tokenizer,
    args=training_arguments,
)

# Now you can proceed to train your model
trainer.train()
trainer.save_model()




Using device: cuda:0


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/1399 [00:00<?, ? examples/s]

Map:   0%|          | 0/350 [00:00<?, ? examples/s]

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


Step,Training Loss,Validation Loss
32,2.2347,2.290883
64,2.346,2.067358
96,1.7182,1.467019
128,1.2807,1.258866
160,1.066,1.177631
192,0.9153,1.178319
224,0.9147,1.167646
256,0.7857,1.220733
288,0.7978,1.149394
320,0.5779,1.176408


Step,Training Loss,Validation Loss
32,2.2347,2.290883
64,2.346,2.067358
96,1.7182,1.467019
128,1.2807,1.258866
160,1.066,1.177631
192,0.9153,1.178319
224,0.9147,1.167646
256,0.7857,1.220733
288,0.7978,1.149394
320,0.5779,1.176408


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch
from trl import setup_chat_format

# Define your base model and new model paths
base_model = "meta-llama/Llama-2-7b-hf"  # Replace with your base model path
new_model = "experiments"     # Replace with your new model path

# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model)

# Load the base model with low CPU memory usage, avoiding offloading
base_model_reload = AutoModelForCausalLM.from_pretrained(
    base_model,
    return_dict=True,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    device_map="balanced",  # Use "cpu" or set to "cuda:0" if you want to load on the first GPU
    trust_remote_code=True,
)

# Setup chat format
base_model_reload, tokenizer = setup_chat_format(base_model_reload, tokenizer)

# Merge adapter with base model
model = PeftModel.from_pretrained(base_model_reload, new_model)

# Merge LoRA layers into the base model
merged_model = model.merge_and_unload()

# Save the merged model
merged_model.save_pretrained("merged_LLaMa2_7B_Chat-finetuned", safe_serialization=True)

# Save the tokenizer
tokenizer.save_pretrained("merged_LLaMa2_7B_Chat-finetuned")

print("Merged model and tokenizer saved successfully.")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Merged model and tokenizer saved successfully.


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Define the output directory where the merged model is saved
OUTPUT_DIR = "merged_LLaMa2_7B_Chat-finetuned"  # Adjust this to your local path

# Load the tokenizer from the local path
tokenizer = AutoTokenizer.from_pretrained(OUTPUT_DIR)

# Define the device (use GPU if available)
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

# Load the model with low memory usage and on the appropriate device
model = AutoModelForCausalLM.from_pretrained(
    OUTPUT_DIR,
    low_cpu_mem_usage=True,  # Use low memory settings if applicable
    torch_dtype=torch.float16 if DEVICE == "cuda:0" else torch.float32,  # Use float16 on GPU
).to(DEVICE)  # Move model to the selected device

def generate_prompt(instruction: str, system_prompt: str = "You are a helpful assistant.") -> str:
    return f"""### Instruction: {system_prompt}

### Input:
{instruction.strip()}

### Response:
""".strip()

def summarize(model, text: str):
    # Tokenize input text
    inputs = tokenizer(text, return_tensors="pt").to(DEVICE)  # Move inputs to the same device as the model

    # Generate summary
    with torch.inference_mode():
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,  # Limit the number of tokens generated
            temperature=0.0001,  # Control randomness of output
            do_sample=False  # Turn off sampling for deterministic output
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)  # Decode the output

# Example usage
instruction = "Explain the main points of Article 101."
prompt = generate_prompt(instruction)
summary = summarize(model, prompt)
print(summary)

# Optional: Clear CUDA cache if using GPU
torch.cuda.empty_cache()


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



### Instruction: You are a helpful assistant.

### Input:
Explain the main points of Article 101.

### Response:
(b) he is a citizen of Pakistan;

(c) he is not less than eighteen years of age;

(d) his name appears on the electoral roll; and

(e) he is not declared by a competent court to be of unsound mind.

(2) A person shall not be qualified for election as a member of 2[Majlis-

e-Shoora (Parliament)] if—

(a) he does not live for twelve months immediately before the

day of election as an ordinary resident in the constituency for

which he stands; or

(b) he does not pay his annual income tax consolidated

duties for the year immediately preceding the day of election:

Provided that this clause shall not apply to the President and

a member of the National Assembly disqualified from standing

for election as a member of the Assembly.

(3) No person shall be qualified for election as a member of 2[Majlis-

e-Shoora (Parliament)] if he holds any office other than the following

off

In [None]:
instruction = "i have committed theft, what does the constitution say about that."
prompt = generate_prompt(instruction)
summary = summarize(model, prompt)
print(summary)

### Instruction: You are a helpful assistant.

### Input:
i have committed theft, what does the constitution say about that.

### Response:
it provides that any person who is guilty of

theft shall be punished with imprisonment of either

description for a term which may extend to

three years, or with fine, or with both.

(iv) "goods" includes all materials, commodities and

articles whatsoever, except money;

(v) "house-tiller" means any person who tills the

land of another for the purpose of agriculture or

horticulture and includes an agricultural labourer;

(vi) "judge" means judge of a High Court and includes

the Chief Justice of a High Court and, when

occasion requires, the Appellate Division;

(vii) "jurisdiction" means jurisdiction over the

entirety of a suit, appeal or application and

includes geographical jurisdiction and

jurisdiction as to the nature or the stage of the

proceedings;

(viii) "law" includes any custom or usage having the

force of law, but does not inc