In [1]:
import torch

print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(0))
print(torch.cuda.device_count())

True
0
NVIDIA GeForce GTX 1070
1


In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer

model_id = "google/gemma-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id, token='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')
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0}, token='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')

Downloading shards: 100%|██████████| 2/2 [01:11<00:00, 35.60s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.12s/it]


In [4]:
text = "Question: What is a workbench in Red Hat OpenShift AI?\nAnswer:"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What is a workbench in Red Hat OpenShift AI?
Answer: A workbench is a collection of Jupyter notebooks that you can use to explore and experiment with AI models. Workbenches are available in the Red Hat OpenShift AI console.

Question: What is a Jupyter notebook?
Answer: A Jupyter notebook is


In [5]:
from peft import LoraConfig

lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

In [12]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="synthetic.json")
# data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, token='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')

# Define a function to tokenize both question and answer
def tokenize_function(batch):
    tokenized_batch = tokenizer(batch["question"], batch["answer"], padding="max_length", truncation=True, max_length=128)
    return tokenized_batch

# Tokenize both questions and answers
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Access tokenized data
print(tokenized_dataset['train'][0])

Generating train split: 100 examples [00:00, 10746.36 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 6867.58 examples/s]

{'question': 'How would you describe a workbench in Red Hat OpenShift AI?', 'answer': 'In this setup, a workbench is an instance of an environment where development and experimentation occur, enabling the selection of a notebook image for data science.', 'input_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2299, 1134, 692, 12637, 476, 182459, 575, 4075, 20825, 6376, 34030, 16481, 235336, 2, 886, 736, 10021, 235269, 476, 182459, 603, 671, 5566, 576, 671, 4473, 1570, 3505, 578, 80848, 5489, 235269, 34500, 573, 8492, 576, 476, 31916, 2416, 604, 1423, 8042, 235265], 'attention_mask': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0




In [13]:
def formatting_func(example):
    text = f"Question: {example['question'][0]}\nAnswer: {example['answer'][0]}<eos>"
    return [text]
formatting_func(tokenized_dataset["train"])

['Question: How would you describe a workbench in Red Hat OpenShift AI?\nAnswer: In this setup, a workbench is an instance of an environment where development and experimentation occur, enabling the selection of a notebook image for data science.<eos>']

In [14]:
import transformers
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_dataset["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=20,
        # Copied from other hugging face tuning blog posts
        learning_rate=2e-4,
        fp16=True,
        # It makes training faster
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
    formatting_func=formatting_func,
    packing=False
)
trainer.train()

Map: 100%|██████████| 100/100 [00:00<00:00, 25940.40 examples/s]


Step,Training Loss
1,0.8818
2,0.8818
3,0.8553
4,0.7908
5,0.7255
6,0.6602
7,0.5985
8,0.5449
9,0.5009
10,0.4603


TrainOutput(global_step=20, training_loss=0.48610656931996343, metrics={'train_runtime': 11.3848, 'train_samples_per_second': 7.027, 'train_steps_per_second': 1.757, 'total_flos': 11711138611200.0, 'train_loss': 0.48610656931996343, 'epoch': 20.0})

In [35]:
text = "Question: What is a workbench in Red Hat OpenShift AI?\nAnswer:"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=50)

In [36]:
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What is a workbench in Red Hat OpenShift AI?
Answer: In this context, a workbench is a development environment that provides a set of tools and resources for creating and testing machine learning models.
