In [1]:
import torch

print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(0))
print(torch.cuda.device_count())

True
0
NVIDIA GeForce GTX 1070
1


In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer

model_id = "google/gemma-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id, token='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')
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0}, token='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')

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.05s/it]


In [3]:
text = "Question: What is a workbench in Red Hat OpenShift AI?\nAnswer:"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What is a workbench in Red Hat OpenShift AI?
Answer: A workbench is a collection of Jupyter notebooks that you can use to explore and experiment with AI models. Workbenches are available in the Red Hat OpenShift AI console.

Question: What is a Jupyter notebook?
Answer: A Jupyter notebook is


In [4]:
text = "Question: What serving options does OpenShift AI provide?\nAnswer:"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=80)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What serving options does OpenShift AI provide?
Answer: OpenShift AI provides a variety of serving options, including:

* <strong>OpenShift Container Platform</strong>: OpenShift Container Platform is a fully managed Kubernetes-based container orchestration platform that automates the deployment, scaling, and management of containerized applications. It provides a unified management interface for container orchestration, container registry, and container registry management.
* <strong>OpenShift Container Registry</strong>


In [5]:
text = "Question: What are the components of an OpenShift AI data science project?\nAnswer:"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=80)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What are the components of an OpenShift AI data science project?
Answer: The components of an OpenShift AI data science project are:

* <strong>Data</strong>: The data that is used to train the model.
* <strong>Model</strong>: The model that is trained on the data.
* <strong>Deployment</strong>: The deployment of the model to the production environment.
* <strong>Monitoring</strong>: The monitoring of the model to ensure it is performing as


In [6]:
from peft import LoraConfig

lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

In [7]:
# from datasets import load_dataset
# dataset = load_dataset("json", data_files="synthetic_component_data.json")

from datasets import load_dataset, concatenate_datasets
dataset1 = load_dataset("json", data_files="synthetic_component_data.json")
dataset2 = load_dataset("json", data_files="synthetic_model_serve_data.json")
dataset3 = load_dataset("json", data_files="synthetic_workbench_data.json")

print(dataset1)
print(dataset2)
print(dataset3)

combined_dataset = concatenate_datasets([dataset1['train'], dataset2['train'], dataset3['train']])
shuffled_combined_dataset = combined_dataset.shuffle(seed=42)
print(shuffled_combined_dataset)

DatasetDict({
    train: Dataset({
        features: ['question', 'answer'],
        num_rows: 68
    })
})
DatasetDict({
    train: Dataset({
        features: ['question', 'answer'],
        num_rows: 85
    })
})
DatasetDict({
    train: Dataset({
        features: ['question', 'answer'],
        num_rows: 100
    })
})
Dataset({
    features: ['question', 'answer'],
    num_rows: 253
})


In [9]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, token='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')

# Define a function to tokenize both question and answer
def tokenize_function(batch):
    tokenized_batch = tokenizer(batch["question"], batch["answer"], padding="max_length", truncation=True, max_length=128)
    return tokenized_batch

# Tokenize both questions and answers
tokenized_dataset = shuffled_combined_dataset.map(tokenize_function, batched=True)

# Access tokenized data
print(tokenized_dataset[0])

{'question': 'What elements are indispensable for the architecture of an OpenShift AI data science project?', 'answer': 'Indispensable elements for the architecture of a data science project are workbenches, pipelines, model server, cluster storage, data connections, and permissions.', 'input_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1841, 6635, 708, 46314, 604, 573, 16333, 576, 671, 6376, 34030, 16481, 1423, 8042, 3542, 235336, 2, 2230, 502, 18967, 887, 6635, 604, 573, 16333, 576, 476, 1423, 8042, 3542, 708, 1160, 2352, 2127, 235269, 88364, 235269, 2091, 6934, 235269, 16952, 7387, 235269, 1423, 17922, 235269, 578, 36058, 235265], 'attention_mask': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [11]:
def formatting_func(example):
    text = f"Question: {example['question'][0]}\nAnswer: {example['answer'][0]}<eos>"
    return [text]
formatting_func(tokenized_dataset)

['Question: What elements are indispensable for the architecture of an OpenShift AI data science project?\nAnswer: Indispensable elements for the architecture of a data science project are workbenches, pipelines, model server, cluster storage, data connections, and permissions.<eos>']

In [13]:
import transformers
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_dataset,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=25,
        # Copied from other hugging face tuning blog posts
        learning_rate=2e-4,
        fp16=True,
        # It makes training faster
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
    formatting_func=formatting_func,
    packing=False
)
trainer.train()

Map: 100%|██████████| 253/253 [00:00<00:00, 62457.85 examples/s]


Step,Training Loss
1,0.8401
2,0.8401
3,0.8175
4,0.7643
5,0.71
6,0.6565
7,0.6003
8,0.5418
9,0.4842
10,0.4311


TrainOutput(global_step=25, training_loss=0.3954344913363457, metrics={'train_runtime': 44.9731, 'train_samples_per_second': 2.224, 'train_steps_per_second': 0.556, 'total_flos': 16431444480000.0, 'train_loss': 0.3954344913363457, 'epoch': 25.0})

In [14]:
text = "Question: What is a workbench in Red Hat OpenShift AI?\nAnswer:"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=50)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What is a workbench in Red Hat OpenShift AI?
Answer: A workbench is a collection of data science tools that you can use to build data science models. Workbenches are organized into workspaces, which are collections of workbenches.


In [15]:
text = "Question: In OpenShift AI, what are the options for serving models?\nAnswer:"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=80)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: In OpenShift AI, what are the options for serving models?
Answer: The options for serving models are model server, data science server, and data science server cluster.


In [16]:
text = "Question: What are the components of an OpenShift AI data science project?\nAnswer:"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=80)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What are the components of an OpenShift AI data science project?
Answer: The components of an OpenShift AI data science project are data connections, pipelines, model server, model server permissions, model server access, model server permissions, model server connections, model server permissions, model server connections, model server permissions, model server connections, model server permissions, model server connections, model server permissions, model server connections, model server permissions, model server connections, model server permissions, model
