## Installing and Importing the Dependencies

In [None]:
!pip install transformers
!pip install torch
!pip install peft
!pip install trl
!pip install accelerate
!pip install datasets
!pip install huggingface_hub
!pip install bitsandbytes
!pip install sentencepiece
!pip install einops

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) n
Token is valid (permission: write).
Your token has been saved to /root/.c

In [None]:
from transformers import TrainingArguments, BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, PeftModel
from datasets import load_dataset
import torch
from trl import SFTTrainer
from transformers import pipeline
from peft import prepare_model_for_kbit_training



### Downloading the Llama2 Model from HuggingFace

In [None]:
model_name = 'meta-llama/Llama-2-7b-chat-hf'
dataset = 'PiyushLavaniya/HTML_Dataset_for_LLama2_Finetuning'
new_model = 'Website_Designer-Finetuned_Llama2'
output_dir = 'Finedtuned_Llama2'

#### Specifying Quantization Config

In [None]:
bnb_config = BitsAndBytesConfig(
      load_in_4bit = True,
      bnb_4bit_quant_type='nf4',
      bnb_4bit_compute_dtype=torch.float16
  )

In [None]:
DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name,
                             quantization_config = bnb_config,
                             device_map = DEVICE)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
model.config.use_cache = False

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

### Testing the Base Model before Finetuning

In [None]:
prompt = 'Write an HTML template to display a greeting message. The message should include the name of the user who visits the website.'

In [None]:
pipe = pipeline(task = 'text-generation', model = model, tokenizer = tokenizer, max_length = 200)

In [None]:
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] Write an HTML template to display a greeting message. The message should include the name of the user who visits the website. [/INST]  Sure! Here is an HTML template that displays a greeting message with the name of the user who visits the website:
```
<!DOCTYPE html>
<html>
<head>
  <title>Welcome to My Website</title>
</head>
<body>
  <h1>Welcome, <span id="user-name">{{ user_name }}</span>!</h1>
  <p>You are visitor number {{ visitor_num }} today.</p>
</body>
</html>
```
Here's how the template works:

* The `<!DOCTYPE html>` declaration specifies the document type and version of HTML being used.
* The `<html>` element is the root element of the HTML document


### Downloading the Dataset for Finetuning
#### This Dataset is already preprocessed according the Prompt Template of Llama2 Model. If you also want to Know how to Preprocess the Dataset, refer to the following link:
https://github.com/PiyushLavaniya/Website_Designer_Llama2-Finetuned/blob/main/Preparing_dataset_for_llama2_Finetuning.ipynb

In [None]:
dataset = load_dataset(dataset, split = 'train')

In [None]:
dataset

Dataset({
    features: ['text'],
    num_rows: 2000
})

In [None]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

## Finetuning the Llama2 for Website Designing

In [None]:
from trl import SFTTrainer

### Specifying the LoRA Config for training

In [None]:
peft_config = LoraConfig(
    r = 64,
    lora_alpha = 16,
    target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
    bias = 'none',
    lora_dropout = 0.1,
    task_type = 'CAUSAL_LM'
)

### Setting up Hyperparameters

In [None]:
training_arguments = TrainingArguments(
    per_device_train_batch_size = 4,
    gradient_accumulation_steps = 2,
    optim = 'paged_adamw_32bit',
    logging_steps = 20,
    learning_rate = 2e-4,
    fp16 = False,
    max_grad_norm = 0.3,
    max_steps = -1,
    num_train_epochs = 1,
    #evaluation_strategy = 'steps',
    #eval_steps = 0.2,
    warmup_ratio = 0.03,
    group_by_length = True,
    output_dir = output_dir,
    report_to = 'tensorboard',
    lr_scheduler_type = 'cosine',
    seed = 42  ##Same as we specified in data shuffling
)

model.config.use_cache = False

### Creating the trainer Object

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,                # use our lora peft config
    dataset_text_field="text",
    max_seq_length=None,                    # no max sequence length
    tokenizer=tokenizer,                    # use the llama tokenizer
    args=training_arguments,                # use the training arguments
    packing=False,                          # don't need packing
)



### Start training

In [None]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
20,1.6394
40,1.2732
60,0.9586
80,0.9626
100,1.0508
120,0.8671
140,0.97
160,0.8525
180,0.9203
200,0.9998


TrainOutput(global_step=250, training_loss=1.0310461044311523, metrics={'train_runtime': 3526.4937, 'train_samples_per_second': 0.567, 'train_steps_per_second': 0.071, 'total_flos': 9813687761633280.0, 'train_loss': 1.0310461044311523, 'epoch': 1.0})

In [None]:
trainer.model.save_pretrained(new_model)

## Inference using the Finetuned Model

In [None]:
prompt = 'Write an HTML template to display a greeting message. The message should include the name of the user who visits the website.'

In [None]:
pipe = pipeline(task = 'text-generation', model = model, tokenizer = tokenizer, max_length = 200)

In [None]:
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


[INST] Write an HTML template to display a greeting message. The message should include the name of the user who visits the website. [/INST] <!DOCTYPE html>
<html>
<head>
  <title>Greeting Message</title>
</head>
<body>
  <h1>Hello, <username>!</h1>
</body>
</html> 


## Merging the Base Model and Finetuned Model

#### To push the model to HuggingFace Hub, we need to merge the Base Model with our Finetuned Adapters.

In [None]:
del model
del pipe
del trainer
import gc
gc.collect()
gc.collect()

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=DEVICE,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
model = PeftModel.from_pretrained(base_model, new_model)

In [None]:
model = model.merge_and_unload()

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code = True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) n
Token is valid (permission: write).
Your token has been saved to /root/.c

In [None]:
model.push_to_hub(new_model, use_temp_dir = False)
tokenizer.push_to_hub(new_model, use_temp_dir = False)

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/3.59G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/PiyushLavaniya/Website_Designer-Finetuned_Llama2/commit/3d0ca3edb066d5d9606dbe32e5278b32abc63be7', commit_message='Upload tokenizer', commit_description='', oid='3d0ca3edb066d5d9606dbe32e5278b32abc63be7', pr_url=None, pr_revision=None, pr_num=None)

## If you want to download and test out the Model yourself, refer to the following link:
https://huggingface.co/PiyushLavaniya/Website_Designer-Finetuned_Llama2