In [2]:
import torch

print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(0))
print(torch.cuda.device_count())

True
0
NVIDIA GeForce GTX 1070
1


In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer

model_id = "google/gemma-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id, token='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')
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0}, token='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')

  from .autonotebook import tqdm as notebook_tqdm
Downloading shards: 100%|██████████| 2/2 [01:07<00:00, 33.81s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.08s/it]


In [4]:
text = "Question: What is a workbench in Red Hat OpenShift AI?\nAnswer:"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What is a workbench in Red Hat OpenShift AI?
Answer: A workbench is a collection of Jupyter notebooks that you can use to explore and experiment with AI models. Workbenches are available in the Red Hat OpenShift AI console.

Question: What is a Jupyter notebook?
Answer: A Jupyter notebook is


In [5]:
text = "Question: What serving options does OpenShift AI provide?\nAnswer:"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=80)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What serving options does OpenShift AI provide?
Answer: OpenShift AI provides a variety of serving options, including:

* <strong>OpenShift Container Platform</strong>: OpenShift Container Platform is a fully managed Kubernetes-based container orchestration platform that automates the deployment, scaling, and management of containerized applications. It provides a unified management interface for container orchestration, container registry, and container registry management.
* <strong>OpenShift Container Registry</strong>


In [6]:
text = "Question: What are the components of an OpenShift AI data science project?\nAnswer:"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=80)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What are the components of an OpenShift AI data science project?
Answer: The components of an OpenShift AI data science project are:

* <strong>Data</strong>: The data that is used to train the model.
* <strong>Model</strong>: The model that is trained on the data.
* <strong>Deployment</strong>: The deployment of the model to the production environment.
* <strong>Monitoring</strong>: The monitoring of the model to ensure it is performing as


In [7]:
from peft import LoraConfig

lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

In [8]:
# from datasets import load_dataset
# dataset = load_dataset("json", data_files="synthetic_component_data.json")

from datasets import load_dataset, concatenate_datasets
dataset1 = load_dataset("json", data_files="synthetic_component_data copy.json")
dataset2 = load_dataset("json", data_files="synthetic_model_serve_data copy.json")
dataset3 = load_dataset("json", data_files="synthetic_workbench_data copy.json")

print(dataset1)
print(dataset2)
print(dataset3)

combined_dataset = concatenate_datasets([dataset1['train'], dataset2['train'], dataset3['train']])
shuffled_combined_dataset = combined_dataset.shuffle(seed=42)
print(shuffled_combined_dataset)

Generating train split: 1174 examples [00:00, 86117.51 examples/s]
Generating train split: 1158 examples [00:00, 364421.07 examples/s]
Generating train split: 1152 examples [00:00, 371880.11 examples/s]

DatasetDict({
    train: Dataset({
        features: ['answer', 'question'],
        num_rows: 1174
    })
})
DatasetDict({
    train: Dataset({
        features: ['answer', 'question'],
        num_rows: 1158
    })
})
DatasetDict({
    train: Dataset({
        features: ['answer', 'question'],
        num_rows: 1152
    })
})
Dataset({
    features: ['answer', 'question'],
    num_rows: 3484
})





In [13]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, token='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')

# Define a function to tokenize both question and answer
def tokenize_function(batch):
    tokenized_batch = tokenizer(batch["question"], batch["answer"], padding="max_length", truncation=True, max_length=128)
    return tokenized_batch

# Tokenize both questions and answers
tokenized_dataset = shuffled_combined_dataset.map(tokenize_function, batched=True)

# Access tokenized data
print(tokenized_dataset[0])

Map: 100%|██████████| 3484/3484 [00:00<00:00, 12735.21 examples/s]

{'answer': 'The essential building blocks are workbenches, pipelines, model server, cluster storage, data connections, and permissions.', 'question': 'What are the essential building blocks of an OpenShift AI data science project?', 'input_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1841, 708, 573, 8727, 4547, 13854, 576, 671, 6376, 34030, 16481, 1423, 8042, 3542, 235336, 2, 651, 8727, 4547, 13854, 708, 1160, 2352, 2127, 235269, 88364, 235269, 2091, 6934, 235269, 16952, 7387, 235269, 1423, 17922, 235269, 578, 36058, 235265], 'attention_mask': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0




In [14]:
def formatting_func(example):
    text = f"Question: {example['question'][0]}\nAnswer: {example['answer'][0]}<eos>"
    return [text]
formatting_func(tokenized_dataset)

['Question: What are the essential building blocks of an OpenShift AI data science project?\nAnswer: The essential building blocks are workbenches, pipelines, model server, cluster storage, data connections, and permissions.<eos>']

In [15]:
import transformers
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_dataset,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=50,
        # Copied from other hugging face tuning blog posts
        learning_rate=2e-4,
        fp16=True,
        # It makes training faster
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
    formatting_func=formatting_func,
    packing=False
)
trainer.train()

Map: 100%|██████████| 3484/3484 [00:00<00:00, 369892.05 examples/s]


Step,Training Loss
1,3.1927
2,3.1927
3,3.1186
4,2.9514
5,2.7683
6,2.5655
7,2.3588
8,2.1868
9,2.0473
10,1.9244


TrainOutput(global_step=50, training_loss=0.8411115799844265, metrics={'train_runtime': 106.9895, 'train_samples_per_second': 1.869, 'train_steps_per_second': 0.467, 'total_flos': 111733822464000.0, 'train_loss': 0.8411115799844265, 'epoch': 50.0})

In [16]:
text = "Question: What is a workbench in Red Hat OpenShift AI?\nAnswer:"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=50)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What is a workbench in Red Hat OpenShift AI?
Answer: A workbench in Red Hat OpenShift AI is an instance of an environment for development and experimentation, where users can choose a notebook image for data science projects.


In [17]:
text = "Question: In OpenShift AI, what are the options for serving models?\nAnswer:"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=80)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: In OpenShift AI, what are the options for serving models?
Answer: The options are single-model serving and multi-model serving.


In [18]:
text = "Question: What are the components of an OpenShift AI data science project?\nAnswer:"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=80)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What are the components of an OpenShift AI data science project?
Answer: The components are workbenches, pipelines, model server, cluster storage, data connections, and permissions.
