In [None]:
from huggingface_hub import login
login()

In [None]:
!pip freeze > requirements2.txt

In [None]:
!pip install -q -U transformers accelerate git+https://github.com/huggingface/peft.git
!pip install -q datasets bitsandbytes einops wandb

In [None]:
!pip install trl==0.7.11

## Dataset



In [None]:
from datasets import load_dataset

#dataset_name = "timdettmers/openassistant-guanaco" ###Human ,.,,,,,, ###Assistant

dataset_name = 'Lennard-Heuer/DAIM-LLM'
data_files = 'fine_tuning_data (14).jsonl'
# data_files = 'Long Exemplary Instruction Tasks.jsonl'  # Or 'traini.jsonl' based on the correct file name
dataset_train = load_dataset(dataset_name, data_files=data_files)

In [None]:
# for reverence: from datasets import load_dataset

# for reverence:  #dataset_name = "timdettmers/openassistant-guanaco" ###Human ,.,,,,,, ###Assistant

# for reverence:  dataset_name = 'AlexanderDoria/novel17_test' #french novels
# for reverence:  dataset = load_dataset(dataset_name, split="train")

In [None]:
print(dataset_train)

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer

# Define the model name
model_name = "meta-llama/Meta-Llama-3-8B"

# Load the configuration
config = AutoConfig.from_pretrained(model_name)

# Modify the max_position_embeddings to 4096
config.max_position_embeddings = 4096

# Now, load the model with the modified configuration
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    config=config,
    trust_remote_code=True
)

# Update model configuration to not use cache if needed
model.config.use_cache = False

## Loading the model

Let's also load the tokenizer below

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

Old stuff

In [None]:
# from peft import LoraConfig, get_peft_model

# lora_alpha = 16
# lora_dropout = 0.1
# lora_r = 64

#peft_config = LoraConfig(
#    lora_alpha=lora_alpha,
#    lora_dropout=lora_dropout,
#    r=lora_r,
#    bias="none",
#    task_type="CAUSAL_LM"
#)

In [None]:
n_freeze = 28 # you can play with this parameter


# freeze layers (disable gradients)
for param in model.parameters(): param.requires_grad = False
for param in model.lm_head.parameters(): param.requires_grad = True
for param in model.model.layers[n_freeze:].parameters(): param.requires_grad = True

## Loading the trainer

Here we will use the [`SFTTrainer` from TRL library](https://huggingface.co/docs/trl/main/en/sft_trainer) that gives a wrapper around transformers `Trainer` to easily fine-tune models on instruction based datasets using PEFT adapters. Let's first load the training arguments below.

used to be 4/4

In [None]:
pip install accelerate -U transformers[torch]

In [None]:
from transformers import TrainingArguments

output_dir = "./results"
per_device_train_batch_size = 1
gradient_accumulation_steps = 1
optim = "paged_adamw_32bit"
save_steps = 100
logging_steps = 10
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 100
warmup_ratio = 0.03
lr_scheduler_type = "constant"
max_seq_length = 512

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
)

Then finally pass everthing to the trainer

In [None]:
train_dataset = dataset_train['train']

In [None]:
train_dataset

In [None]:
# Upgrade the trl package to the latest version
!pip install --upgrade trl

In [None]:
from trl import SFTTrainer, SFTConfig
from transformers import TrainingArguments

trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=train_dataset,
    tokenizer=tokenizer,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
)

We will also pre-process the model by upcasting the layer norms in float 32 for more stable training

## Train the model

Now let's train the model! Simply call `trainer.train()`

In [None]:
trainer.train()

During training, the model should converge nicely as follows:

![image](https://huggingface.co/datasets/trl-internal-testing/example-images/resolve/main/images/loss-falcon-7b.png)



In [None]:
# Save model and tokenizer
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

# Load the model and tokenizer from the saved directory
model = AutoModelForCausalLM.from_pretrained(output_dir)
tokenizer = AutoTokenizer.from_pretrained(output_dir)

# Push to Hugging Face Hub
model.push_to_hub("Lennard-Heuer/Llama3-FT_V4")
tokenizer.push_to_hub("Lennard-Heuer/Llama3-FT_V4")

In [None]:
from transformers import Trainer

# Assuming `trainer` is your Trainer instance with the model you've fine-tuned

# Save the model; this accounts for parallel/distributed training scenarios
model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model
model_to_save.save_pretrained("outputsV3")

# Assuming you've already logged in to Hugging Face in your environment
# If not, you would need to log in using `huggingface_hub.login()`

# Push to the Hub. Replace "your_model_name" with your desired model name on the Hub
# Ensure the model name is unique and descriptive enough
trainer.push_to_hub("your_model_name")


In [None]:
trainer.model.save_pretrained("outputsV4")


In [None]:
from huggingface_hub import login
login()

The `SFTTrainer` also takes care of properly saving only the adapters during training instead of saving the entire model.

In [None]:
trainer.push_to_hub("Lennard-Heuer/Llama3-FT-D")

In [None]:
model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model  # Take care of distributed/parallel training
model_to_save.save_pretrained("outputsV3")

In [None]:
lora_config = LoraConfig.from_pretrained('outputsV3')
model = get_peft_model(model, lora_config)

Not in the 1littlecoder skript

In [None]:
from transformers import AutoModel

# Assuming 'your_library' is where LoraConfig is from

# Load the base model
model = AutoModel.from_pretrained('outputsV3')

# Load the adapter configuration
# The actual function to load the adapter will depend on the library you are using
# Replace 'load_adapter_function' with the actual function name
model.load_adapter('outputsV3/adapter_config.json')

# Load the LoRA configuration
lora_config = LoraConfig.from_pretrained('outputsV3')

# Apply the LoRA configuration to the model
# This will depend on how the LoRA configuration is applied in your library
# Replace 'apply_lora_config_to_model' with the actual function you use to apply the configuration
model = get_peft_model(model, lora_config)


In [None]:
lora_config = LoraConfig.from_pretrained('outputsV3')
model = get_peft_model(model, lora_config)

In [None]:
train_dataset['text']

In [None]:
text = "Mention a production planning model"
device = "cuda:0"

inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
from huggingface_hub import login
login()

In [None]:
model.push_to_hub("llama2-qlora-finetunined-XXX-1")

In [None]:
Lennard-Heuer/llama2-qlora-finetunined-XXX

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer

model_name = "Lennard-Heuer/llama2-qlora-finetunined-XXX"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True
)
model.config.use_cache = False

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
from transformers import AutoModel, AutoConfig
import json
import safetensors.torch

# Load the base pre-trained LLAMA2 model
model = AutoModel.from_pretrained("TinyPixel/Llama-2-7B-bf16-sharded")

# Load adapter configuration
with open("Lennard-Heuer/llama2-qlora-finetunined-XXX/adapter_config.json", "r") as file:
    adapter_config = json.load(file)

# Assuming the library and model support loading adapters in this way
# This part is more conceptual since actual implementation can vary
model.load_adapter("Lennard-Heuer/llama2-qlora-finetunined-XXX/adapter_model.safetensors", config=adapter_config)

# If safetensors is the format for your adapter_model, ensure you have a method to load it
# For example, using safetensors.torch.load if you're working with PyTorch
adapter_weights = safetensors.torch.load("Lennard-Heuer/llama2-qlora-finetunined-XXX/adapter_model.safetensors")
model.load_state_dict(adapter_weights, strict=False)

# Your model is now ready to use with the loaded adapters


In [None]:
from transformers import AutoModel, AutoConfig
import json
import safetensors.torch

# Load the base pre-trained LLAMA2 model
model = AutoModel.from_pretrained("TinyPixel/Llama-2-7B-bf16-sharded")

# Load adapter configuration
# with open("Lennard-Heuer/llama2-qlora-finetunined-XXX/adapter_config.json", "r") as file:
    # adapter_config = json.load(file)


In [None]:
import torch
import transformers
import peft
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
!pip install -q peft transformers torch

In [None]:
peft_model_id = "Lennard-Heuer/llama2-qlora-finetunined-XXX"

In [None]:
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

In [None]:
config

In [None]:
batch = tokenizer("“Training models with PEFT and LoRa is cool” ->: ", return_tensors='pt')

with torch.cuda.amp.autocast():
  output_tokens = model.generate(**batch, max_new_tokens=50)

print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))

In [None]:
config =

In [None]:
import torch
import peft

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer

with open("Lennard-Heuer/llama2-qlora-finetunined-XXX/adapter_config.json", "r") as file:
    adapter_config = json.load(file)


In [None]:
# Assuming `adapter_config` is loaded from `adapter_config.json`
# and `adapter_weights` from `adapter_model.safetensors`

# Load adapter configuration
with open("path_to/adapter_config.json", "r") as file:
    adapter_config = json.load(file)

# Assuming you have a mechanism to load safetensors into a format compatible with your model
# This might involve using a custom loading function or converting safetensors to PyTorch tensors
adapter_weights = safetensors.torch.load("path_to/adapter_model.safetensors")

# Integrate adapter configuration and weights into the model
# This step is highly dependent on the specific mechanisms your adapter and model support
# For example, with PEFT and LoRA, you might need to use specific functions to inject the adapter
model = get_peft_model(model, adapter_config, peft_config, adapter_weights)


In [None]:
text = "Mention a production planning model"
device = "cuda:0"

inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

**New Section**

In [None]:
!pip freeze > requirements2.txt

In [None]:
!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git
!pip install -q datasets bitsandbytes einops wandb

In [None]:
!pip install -q peft transformers torch

In [None]:
peft_model_id = "Lennard-Heuer/llama2-qlora-finetunined-XXX"

In [None]:
import torch
import transformers
import peft
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
import accelerate
import bitsandbytes

In [None]:
pip install -i https://pypi.org/simple/ bitsandbytes

In [None]:
pip install accelerate

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer

model_name = "TinyPixel/Llama-2-7B-bf16-sharded"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True
)
model.config.use_cache = False

In [None]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = "Lennard-Heuer/llama2-qlora-finetunined-XXX"
config = PeftConfig.from_pretrained(peft_model_id)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

In [None]:
!pip install -q peft transformers torch

In [None]:
import torch
import transformers
import peft
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
peft_model_id = "Lennard-Heuer/llama2-qlora-finetunined-XXX"

In [None]:
config = PeftConfig.from_pretrained(peft_model_id)
# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)