<a href="https://colab.research.google.com/github/AT2071/Samples/blob/main/Guided_Generation_Kopal_Garg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Author: [Kopal Garg](https://www.linkedin.com/in/gargkopal/)

Date: June 4, 2024

# Setup

In [None]:
%%capture
!pip install google-cloud-aiplatform
!pip install google-auth
!pip install google-auth-oauthlib
!pip install google-auth-httplib2



#Authenticate

In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
import json
import jsonschema
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models

vertexai.init(project="cart-ppt-llm", location="us-central1")
model = GenerativeModel("gemini-1.0-pro-vision-001")


# Guided Generation with Regular Expressions

In [None]:
import base64
import re
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models


number_pattern = re.compile(r"^\d{6}$")
def validate_number(number_str):
    if number_pattern.match(number_str):
        return True
    else:
        print(number_str)
        print("Invalid 6-digit format. Re-prompting...")
        return False


def generate():
    vertexai.init(project="cart-ppt-llm", location="us-central1")
    model = GenerativeModel(
        "gemini-1.0-pro-vision-001",
    )
    while True:
        responses = model.generate_content(
            [text1],
            generation_config=generation_config,
            stream=True,
        )

        number = ""
        for response in responses:
            number += response.text.strip()

        if validate_number(number):
            return number

text1 = """Generate a valid 6-digit number, ensuring it contains exactly 6 digits with no spaces or other characters. It should be in the format XXXXXX."""

generation_config = {
    "max_output_tokens": 2048,
    "temperature": 0.4,
    "top_p": 0.4,
    "top_k": 32,
}

generated_number = generate()
print(generated_number)

ERROR:grpc._plugin_wrapping:AuthMetadataPluginCallback "<google.auth.transport.grpc.AuthMetadataPlugin object at 0x7a5acc138d60>" raised exception!
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/google/auth/compute_engine/credentials.py", line 128, in refresh
    self._retrieve_info(request)
  File "/usr/local/lib/python3.10/dist-packages/google/auth/compute_engine/credentials.py", line 101, in _retrieve_info
    info = _metadata.get_service_account_info(
  File "/usr/local/lib/python3.10/dist-packages/google/auth/compute_engine/_metadata.py", line 323, in get_service_account_info
    return get(request, path, params={"recursive": "true"})
  File "/usr/local/lib/python3.10/dist-packages/google/auth/compute_engine/_metadata.py", line 248, in get
    raise exceptions.TransportError(
google.auth.exceptions.TransportError: ("Failed to retrieve http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/?recursive=true from the Go

ServiceUnavailable: 503 Getting metadata from plugin failed with error: ("Failed to retrieve http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/?recursive=true from the Google Compute Engine metadata service. Status: 404 Response:\nb''", <google.auth.transport.requests._Response object at 0x7a5abde5c5e0>)

# Guided Generation with JSON Schemas

In [None]:
import json
import jsonschema
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models

vertexai.init(project="cart-ppt-llm", location="us-central1")
model = GenerativeModel("gemini-1.0-pro-vision-001")

schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "integer", "minimum": 0},
        "email": {"type": "string", "format": "email"}
    },
    "required": ["name", "age", "email"]
}

prompt = """
Generate a JSON object representing a user profile. The JSON object should have the following structure:
{
    "name": "a random first and last name",
    "age": an age between 20 and 40,
    "email": "a unique email address"
}
"""

def validate_json(instance, schema):
    """ Validate if the generated JSON matches the schema """
    try:
        jsonschema.validate(instance=instance, schema=schema)

        return True
    except jsonschema.exceptions.ValidationError as err:
        print("Invalid JSON format. Error:", err)
        return False

def generate_json():
    responses = model.generate_content(
        [prompt],
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=True,
    )

    json_response = ""
    for response in responses:
        json_response += response.text.strip()

    if json_response.startswith("```json") and json_response.endswith("```"):
        json_response = json_response[7:-3].strip()

    try:
        json_object = json.loads(json_response)
        if validate_json(json_object, schema):
            return json_object
    except json.JSONDecodeError as e:
        print("Error decoding JSON:", e)
    return None

generation_config = {
    "max_output_tokens": 2048,
    "temperature": 0.4,
    "top_p": 0.4,
    "top_k": 32,
}

safety_settings = {
    generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
}

generated_profile = generate_json()
print(f"{json.dumps(generated_profile, indent=2) if generated_profile else 'None'}")


{
  "name": "John Smith",
  "age": 32,
  "email": "john.smith@example.com"
}


# Guided Generation with Context-Free Grammars (CFGs)
Defines a context-free grammar using the NLTK library to generate simple sentences. The grammar specifies the structure of a sentence (S), noun phrase (NP), and verb phrase (VP).

In [None]:
import random
from nltk import CFG
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models

grammar = CFG.fromstring("""
    S -> NP VP
    NP -> 'John' | 'Mary' | 'Alice' | 'Bob'
    VP -> V Obj
    V -> 'eats' | 'drinks' | 'sees' | 'likes'
    Obj -> Det N
    Det -> 'an' | 'a'
    N -> 'apple' | 'banana' | 'water' | 'book'
""")

vertexai.init(project="cart-ppt-llm", location="us-central1")
model = GenerativeModel("gemini-1.0-pro-vision-001")

prompt = f"""
Generate a sentence based on the following context-free grammar:
{grammar}
Ensure there is a space between each word in the sentence.
"""

def generate_cfg_sentence():
    responses = model.generate_content(
        [prompt],
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=True,
    )

    response_text = ""
    for response in responses:
        response_text += response.text.strip()

    return response_text

generation_config = {
    "max_output_tokens": 100,
    "temperature": 0.4,
    "top_p": 0.9,
}

safety_settings = {
    generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
}


generated_llm_sentence = generate_cfg_sentence()
print(f"{generated_llm_sentence}")


John eats an apple


# Template-based Generation:

In [None]:
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models

vertexai.init(project="cart-ppt-llm", location="us-central1")
model = GenerativeModel("gemini-1.0-pro-vision-001")

prompt = """
Create a user profile using the following template:
Name: {{name}}
Age: {{age}}
Email: {{email}}

Ensure the name is a first name and a last name, the age is a number between 20 and 40, and the email is in the format name@example.com.
"""

def generate_template_based_profile():
    responses = model.generate_content(
        [prompt],
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=True,
    )

    response_text = ""
    for response in responses:
        response_text += response.text.strip()

    return response_text

generation_config = {
    "max_output_tokens": 100,
    "temperature": 0.4,
    "top_p": 0.9,
}

safety_settings = {
    generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
}

generated_profile = generate_template_based_profile()
print(f"{generated_profile}")


Name:John Smith
Age: 35
Email: john.smith@example.com


# Entity-based Generation:

In [None]:
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models

vertexai.init(project="cart-ppt-llm", location="us-central1")
model = GenerativeModel("gemini-1.0-pro-vision-001")

entities = {
    "country": "France",
    "capital": "Paris",
    "famous_food": "croissant",
    "language": "French"
}

prompt = f"""
Generate a 3-line paragraph about {entities['country']} that includes the following entities:
1. The capital city, {entities['capital']}
2. A famous food, {entities['famous_food']}
3. The official language, {entities['language']}
"""

def generate_entity_based_paragraph():
    responses = model.generate_content(
        [prompt],
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=True,
    )

    response_text = ""
    for response in responses:
        response_text += response.text.strip() + " "

    return response_text.strip()

generation_config = {
    "max_output_tokens": 200,
    "temperature": 0.4,
    "top_p": 0.9,
}

safety_settings = {
    generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
}

generated_paragraph = generate_entity_based_paragraph()
print(f"{generated_paragraph}")


Nestled in the heart of Europe, France boasts the captivating capital of Paris, renowned for its iconic Eiffel Tower and the Louvre Museum. Indulge in the delectable aroma of freshly baked croissants, a culinary staple that embodies the nation's rich gastronomic heritage. The official language, French, echoes through the streets, adding a touch of elegance and sophistication to the vibrant atmosphere.


# Structured Data Generation

In [None]:
import pandas as pd
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models

vertexai.init(project="cart-ppt-llm", location="us-central1")
model = GenerativeModel("gemini-1.0-pro-vision-001")

column_headers = ["Name", "Age", "Country", "Profession"]

prompt = f"""
Generate a table with the following columns: {', '.join(column_headers)}.
Provide data for 5 rows.
Ensure that the table is properly formatted as CSV without any line breaks within rows.
"""

def generate_dataframe():
    responses = model.generate_content(
        [prompt],
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=True,
    )

    response_text = ""
    for response in responses:
        response_text += response.text.strip()

    return response_text

generation_config = {
    "max_output_tokens": 300,
    "temperature": 0.4,
    "top_p": 0.9,
}

safety_settings = {
    generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
}

generated_table = generate_dataframe()


df = pd.read_csv(io.StringIO(generated_table))
df

Unnamed: 0,Name,Age,Country,Profession
0,John,30,USA,Software Engineer
1,Mary,25,Canada,Doctor
2,Bob,40,UK,Teacher
3,Alice,28,Australia,Lawyer
4,Tom,35,Germany,Architect
