# Install requirements

In [15]:
!pip install google-cloud-pipeline-components google-cloud-aiplatform kfp



# Import & compile the pipeline

In [9]:
from google_cloud_pipeline_components.preview.llm import rlhf_pipeline
from kfp import compiler
import math

In [7]:
RLHF_PIPELINE_PKG_PATH = "rlhf_pipeline.yaml"

In [8]:
compiler.Compiler().compile(
    pipeline_func=rlhf_pipeline,
    package_path=RLHF_PIPELINE_PKG_PATH
)

# Define the Vertex AI pipeline job

In [10]:
PREF_DATASET_SIZE = 3000
BATCH_SIZE = 64
REWARD_STEPS_PER_EPOCH = math.ceil(PREF_DATASET_SIZE / BATCH_SIZE)
REWARD_NUM_EPOCHS = 30
reward_model_train_steps = REWARD_STEPS_PER_EPOCH * REWARD_NUM_EPOCHS

In [11]:
PROMPT_DATASET_SIZE = 2000
BATCH_SIZE = 64
RL_STEPS_PER_EPOCH = math.ceil(PROMPT_DATASET_SIZE / BATCH_SIZE)
RL_NUM_EPOCHS = 10
reinforcement_learning_train_steps = RL_STEPS_PER_EPOCH * RL_NUM_EPOCHS

In [13]:
# Completed values for the dictionary
parameter_values={
        "preference_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text_small/summarize_from_feedback_tfds/comparisons/train/*.jsonl",
        "prompt_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text_small/reddit_tfds/train/*.jsonl",
        "eval_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text_small/reddit_tfds/val/*.jsonl",
        "large_model_reference": "llama-2-7b",
        "reward_model_train_steps": reward_model_train_steps,
        "reinforcement_learning_train_steps": reinforcement_learning_train_steps, 
        "reward_model_learning_rate_multiplier": 1.0,
        "reinforcement_learning_rate_multiplier": 1.0,
        "kl_coeff": 0.1, 
        "instruction":\
    "Summarize in less than 50 words"}

# Set up Google Cloud to run the Vertex AI pipeline

In [None]:
# Authenticate in utils
credentials, PROJECT_ID, STAGING_BUCKET = '','',''

# RLFH pipeline is available in this region
REGION = "europe-west4"

# Run the pipeline

In [17]:
import google.cloud.aiplatform as aiplatform

In [18]:
aiplatform.init(project = PROJECT_ID,
                location = REGION,
                credentials = credentials)

In [None]:
job = aiplatform.PipelineJob(
    display_name="tutorial-rlhf-tuning",
    pipeline_root=STAGING_BUCKET,
    template_path=RLHF_PIPELINE_PKG_PATH,
    parameter_values=parameter_values)

In [None]:
job.run()