# **Installing and Importing Necessary Libraries**

In [1]:
!pip install -q -U transformers peft accelerate optimum

In [2]:
!pip install -q auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu117/

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, set_seed
import torch


In [4]:
set_seed(42)

# **Loading and Preparing a Quantized Model for Training**

In [5]:
from peft import prepare_model_for_kbit_training

model_id = "TheBloke/Llama-2-7b-Chat-GPTQ"
quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True)
model = AutoModelForCausalLM.from_pretrained(model_id,quantization_config=quantization_config_loading, device_map="auto")

You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. disable_exllama, use_cuda_fp16) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.


In [6]:
model.config.quantization_config.to_dict()

{'quant_method': <QuantizationMethod.GPTQ: 'gptq'>,
 'bits': 4,
 'tokenizer': None,
 'dataset': None,
 'group_size': 128,
 'damp_percent': 0.01,
 'desc_act': False,
 'sym': True,
 'true_sequential': True,
 'use_cuda_fp16': False,
 'model_seqlen': None,
 'block_name_to_quantize': None,
 'module_name_preceding_first_block': None,
 'batch_size': 1,
 'pad_token_id': None,
 'disable_exllama': True}

In [7]:
model = prepare_model_for_kbit_training(model)

# **Configuring and Initializing LoRA Parameters for the Model**

In [8]:
from peft import LoraConfig, get_peft_model
config = LoraConfig(
    r=8,
    lora_alpha=512,
    target_modules=["k_proj","o_proj","q_proj","v_proj"],
    lora_dropout=0.10,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
model.print_trainable_parameters()

trainable params: 8,388,608 || all params: 270,798,848 || trainable%: 3.097726619575575


In [9]:
tokenizer = AutoTokenizer.from_pretrained(model_id)

# **Loading Dataset**

In [10]:
from datasets import load_dataset

data = load_dataset("Abirate/english_quotes")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)

# **Splitting Dataset**





In [11]:
data = data["train"].train_test_split(seed=42, shuffle=True, test_size=0.2)
data

DatasetDict({
    train: Dataset({
        features: ['quote', 'author', 'tags', 'input_ids', 'attention_mask'],
        num_rows: 2006
    })
    test: Dataset({
        features: ['quote', 'author', 'tags', 'input_ids', 'attention_mask'],
        num_rows: 502
    })
})

# **Defining Training Arguments and Setting Up Trainer**

In [12]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling


# needed for llama 2 tokenizer
tokenizer.pad_token = tokenizer.eos_token

trainer = Trainer(
    model=model,
    train_dataset=data["train"],
    eval_dataset=data["test"],
    args=TrainingArguments(
        per_device_train_batch_size=1,
        per_device_eval_batch_size=1,
        evaluation_strategy="steps",
        eval_steps=20,
        max_steps=100,
        learning_rate=2e-4,
        logging_steps=20,
        output_dir="outputs",
        optim="adamw_hf"
    ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
20,6.4379,7.75543
40,7.7224,7.987885
60,7.81,7.08017
80,6.895,6.774339
100,6.7451,6.709042


TrainOutput(global_step=100, training_loss=7.122088928222656, metrics={'train_runtime': 405.5122, 'train_samples_per_second': 0.247, 'train_steps_per_second': 0.247, 'total_flos': 5477012987904.0, 'train_loss': 7.122088928222656, 'epoch': 0.05})

In [13]:


from huggingface_hub import HfApi
api = HfApi()

In [14]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [15]:
api.create_repo("DrishtiSharma/llama-2-chat-gptq-english-quotes-test-lora-alpha-512")

RepoUrl('https://huggingface.co/DrishtiSharma/llama-2-chat-gptq-english-quotes-test-lora-alpha-512', endpoint='https://huggingface.co', repo_type='model', repo_id='DrishtiSharma/llama-2-chat-gptq-english-quotes-test-lora-alpha-512')

In [16]:
api.upload_folder(
    folder_path = "/content/outputs",
    path_in_repo = ".",
    repo_id = "DrishtiSharma/llama-2-chat-gptq-english-quotes-test-lora-alpha-512",
    repo_type = "model"
                  )

optimizer.pt:   0%|          | 0.00/67.3M [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.6k [00:00<?, ?B/s]

adapter_model.bin:   0%|          | 0.00/33.6M [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/627 [00:00<?, ?B/s]

Upload 19 LFS files:   0%|          | 0/19 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/3.96k [00:00<?, ?B/s]

events.out.tfevents.1694639806.b04c78a5e2b1.723.0:   0%|          | 0.00/4.18k [00:00<?, ?B/s]

events.out.tfevents.1694639943.b04c78a5e2b1.5549.0:   0%|          | 0.00/4.55k [00:00<?, ?B/s]

events.out.tfevents.1694640132.b04c78a5e2b1.5549.1:   0%|          | 0.00/4.55k [00:00<?, ?B/s]

events.out.tfevents.1694640189.b04c78a5e2b1.6813.0:   0%|          | 0.00/4.98k [00:00<?, ?B/s]

events.out.tfevents.1694640653.b04c78a5e2b1.8924.0:   0%|          | 0.00/6.15k [00:00<?, ?B/s]

events.out.tfevents.1694641228.b04c78a5e2b1.11505.0:   0%|          | 0.00/5.12k [00:00<?, ?B/s]

events.out.tfevents.1694641365.b04c78a5e2b1.12272.0:   0%|          | 0.00/7.00k [00:00<?, ?B/s]

events.out.tfevents.1694642430.b04c78a5e2b1.16941.0:   0%|          | 0.00/7.00k [00:00<?, ?B/s]

events.out.tfevents.1694643232.b04c78a5e2b1.20474.0:   0%|          | 0.00/7.00k [00:00<?, ?B/s]

events.out.tfevents.1694643847.b04c78a5e2b1.23280.0:   0%|          | 0.00/7.00k [00:00<?, ?B/s]

events.out.tfevents.1694644451.b04c78a5e2b1.26039.0:   0%|          | 0.00/7.00k [00:00<?, ?B/s]

events.out.tfevents.1694645235.b04c78a5e2b1.29429.0:   0%|          | 0.00/7.00k [00:00<?, ?B/s]

events.out.tfevents.1694645927.b04c78a5e2b1.32525.0:   0%|          | 0.00/7.00k [00:00<?, ?B/s]

events.out.tfevents.1694646565.b04c78a5e2b1.35423.0:   0%|          | 0.00/7.00k [00:00<?, ?B/s]

'https://huggingface.co/DrishtiSharma/llama-2-chat-gptq-english-quotes-test-lora-alpha-512/tree/main/.'