In [None]:
!pip install accelerate peft bitsandbytes transformers trl

: 

In [None]:
# from huggingface_hub import notebook_login
# notebook_login()

: 

In [None]:
# load the required packages.

import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, AutoPeftModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
import os

: 

In [None]:
dataset="burkelibbey/colors"
model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0"
output_model="tinyllama-colorist-v1"

: 

: 

### Data preparation


In [None]:
# we need to reformat the data in teh ChatML format.

def formatted_train(input,response)->str:
    return f"<|user|>\n{input}</s>\n<|assistant|>\n{response}</s>"

: 

In [None]:
def prepare_train_data(data_id):
    data = load_dataset(data_id, split="train")
    data_df = data.to_pandas()
    data_df["text"] = data_df[["description", "color"]].apply(lambda x: "<|user|>\n" + x["description"] + "</s>\n<|assistant|>\n" + x["color"] + "</s>", axis=1)
    data = Dataset.from_pandas(data_df)
    return data

: 

In [None]:
data = prepare_train_data(dataset)

: 

In [None]:
data

: 

In [None]:
data[0]

: 

### Model the Model (not the base version)


In [None]:
def get_model_and_tokenizer(mode_id):

    tokenizer = AutoTokenizer.from_pretrained(mode_id)
    tokenizer.pad_token = tokenizer.eos_token
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
    )
    model = AutoModelForCausalLM.from_pretrained(
        mode_id, quantization_config=bnb_config, device_map="auto"
    )
    model.config.use_cache=False
    model.config.pretraining_tp=1
    return model, tokenizer

: 

In [None]:
# !pip install -i https://test.pypi.org/simple/bitsandbytes

: 

In [None]:
model, tokenizer = get_model_and_tokenizer(model_id)

: 

### Setting up the LoRA


In [None]:
peft_config = LoraConfig(
        r=8, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM"
    )

: 

In [None]:
training_arguments = TrainingArguments(
        output_dir=output_model,
        per_device_train_batch_size=16,
        gradient_accumulation_steps=4,
        optim="paged_adamw_32bit",
        learning_rate=2e-4,
        lr_scheduler_type="cosine",
        save_strategy="epoch",
        logging_steps=10,
        num_train_epochs=3,
        max_steps=250,
        fp16=True,
        # push_to_hub=True
    )

: 

In [None]:
trainer = SFTTrainer(
        model=model,
        train_dataset=data,
        peft_config=peft_config,
        dataset_text_field="text",
        args=training_arguments,
        tokenizer=tokenizer,
        packing=False,
        max_seq_length=1024
    )

: 

In [None]:
trainer.train()

: 

: 

### Merging the LoRA with the base model


In [None]:
from peft import AutoPeftModelForCausalLM, PeftModel
from transformers import AutoModelForCausalLM
import torch
import os

model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, load_in_8bit=False,
                                             device_map="auto",
                                             trust_remote_code=True)

model_path = "/content/tinyllama-colorist-v1/checkpoint-250"

peft_model = PeftModel.from_pretrained(model, model_path, from_transformers=True, device_map="auto")

model = peft_model.merge_and_unload()

: 

In [None]:
model

: 

In [None]:
model.push_to_hub("soheilfm/tinyllama-colorist-v0", token = "hf_tiwRDBLWdSsWaxMasasasGnEtyAePVhnufFJ") # Online saving
tokenizer.push_to_hub("soheilfm/tinyllama-colorist-v0", token = "hf_tiwRDBLWdSsWaxMasasasGnEtyAePVhnufFJ") # Online saving

### Inference from the LLM


In [None]:
from transformers import GenerationConfig
from time import perf_counter

def generate_response(user_input):

  prompt = formatted_prompt(user_input)

  inputs = tokenizer([prompt], return_tensors="pt")
  generation_config = GenerationConfig(penalty_alpha=0.6,do_sample = True,
      top_k=5,temperature=0.5,repetition_penalty=1.2,
      max_new_tokens=12,pad_token_id=tokenizer.eos_token_id
  )
  start_time = perf_counter()

  inputs = tokenizer(prompt, return_tensors="pt").to('cuda')

  outputs = model.generate(**inputs, generation_config=generation_config)
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
  output_time = perf_counter() - start_time
  print(f"Time taken for inference: {round(output_time,2)} seconds")

In [None]:
def formatted_prompt(question)-> str:
    return f"<|user|>\n{question}</s>\n<|assistant|>"

In [None]:
def print_color_space(hex_color):
    def hex_to_rgb(hex_color):
        hex_color = hex_color.lstrip('#')
        return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
    r, g, b = hex_to_rgb(hex_color)
    print(f'{hex_color}: \033[48;2;{r};{g};{b}m           \033[0m')

In [None]:
generate_response(user_input='Light Orange color')

<|user|>
Light Orange color 
<|assistant|>
#ffd077 : A vibrant
Time taken for inference: 0.74 seconds


In [None]:
print_color_space('#ffd077')


#ffd077: [48;2;255;208;119m           [0m


In [None]:
import requests

API_URL = "https://api-inference.huggingface.co/models/soheilfm/tinyllama-colorist-v1"
headers = {"Authorization": "Bearer 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"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

output = query({
	"inputs": "Light Orange color",
})

In [None]:
import requests

API_URL = "https://api-inference.huggingface.co/models/soheilfm/tinyllama-colorist-v0"
headers = {"Authorization": "Bearer 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"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

output = query({
	"inputs": f"<|user|>\n'Light Orange color'</s>\n<|assistant|>",
})

In [None]:
output

[{'generated_text': "<|user|>\n'Light Orange color'</s>\n<|assistant|>\n#f0b066 => A vibrant, warm shade of orange with a touch of yellow, reminiscent of sun-kissed citrus fruits or a warm autumn day. It's a bright and cheerful color that is sure to catch the eye.\n\n#f0b066"}]

In [None]:
print_color_space('#f0b066')

#f0b066: [48;2;240;176;102m           [0m


In [None]:
import requests

API_URL = "https://rq1xy8t2nomy0oy2.us-east-1.aws.endpoints.huggingface.cloud"
headers = {
	"Accept" : "application/json",
	"Authorization": "Bearer hf_XXXXX",
	"Content-Type": "application/json"
}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

output = query({
	"inputs": f"<|user|>\n'Light Orange color'</s>\n<|assistant|>",
	"parameters": {}
})

In [None]:
output

[{'generated_text': "<|user|>\n'Light Orange color'</s>\n<|assistant|>\n#f0b06000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"}]