In [1]:
# Import required libraries
from pathlib import Path
import openvino_genai as ov_genai
from llm_config import convert_and_compress_model
import time


In [2]:
# Predefined settings
MODEL_ID = "gemma-2-9b-it"
COMPRESSION_VARIANT = "INT4"
DEVICE = "CPU"
USE_PRECONVERTED = True

MODEL_CONFIGURATION = {
    "model_id": "google/gemma-2-9b-it",
    "remote_code": False,
    "start_message": (
        "Ah, you’ve summoned me, mighty one! I am your friendly, magical assistant, ready to grant you "
        "three wishes… er, well, actually, more than three answers! I’ll give you helpful, respectful, "
        "and honest answers, all wrapped up in a mystical cloud of wisdom. Fear not, for I shall keep my answers "
        "safe and positive, avoiding any dark magic, negativity, or harm. Ask away, and I’ll share knowledge fit for royalty!"
    ),
    "history_template": (
        "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>"
    ),
    "current_message_template": (
        "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}"
    ),
    "rag_prompt_template": (
        "You are a genie assistant, here to grant wishes in the form of knowledge! Use the following pieces of "
        "retrieved context to answer the question. If you don’t know the answer, just say that even a genie can’t "
        "know it all. Be concise and keep the magic alive, just like a good wish, keep it sweet and to the point. "
        "<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context} <end_of_turn><start_of_turn>model"
    ),
}


In [3]:
def load_model():
    """
    Loads and prepares the model with the specified configuration and compression variant.
    """
    try:
        model_dir = convert_and_compress_model(
            MODEL_ID,
            MODEL_CONFIGURATION,
            COMPRESSION_VARIANT,
            USE_PRECONVERTED,
        )
        pipe = ov_genai.LLMPipeline(str(model_dir), DEVICE)
        return pipe
    except Exception as e:
        print(f"Error during model setup: {e}")
        return None

# Load the model
pipe = load_model()


✅ INT4 gemma-2-9b-it model already converted and can be found in gemma\INT4_compressed_weights


In [4]:
# Example input interaction
input_prompt = input("What do you wish for today? ").strip()

if input_prompt:
    print("Aladdin is granting your wish...")
    time.sleep(1)  # Simulate processing
    try:
        output = pipe.generate(input_prompt, max_new_tokens=200)
        print("\nAbracadabra – here comes your wish!")
        print(output)
    except Exception as e:
        print(f"Error: {e}")
else:
    print("Please enter a valid wish.")


Aladdin is granting your wish...

Abracadabra – here comes your wish!


Morocco is located in **North Africa**. 

It is bordered by:

* **Algeria** to the east and south
* **Western Sahara** to the south
* **Spain** (Ceuta and Melilla) to the north 


Let me know if you have any other questions about Morocco!



In [5]:
# Manage history of interactions
history = []

# Example of adding to history
if input_prompt:
    try:
        output = pipe.generate(input_prompt, max_new_tokens=200)
        history.append({"input": input_prompt, "output": output})
    except Exception as e:
        print(f"Error while generating output: {e}")

# Display history
if history:
    print("\nWish History:")
    for i, entry in enumerate(history):
        print(f"\nWish {i + 1}: {entry['input']}")
        print(f"Response: {entry['output']}")



Wish History:

Wish 1: Where is morraco?
Response: 

Morocco is located in **North Africa**. 

It is bordered by:

* **Algeria** to the east and south
* **Western Sahara** to the south
* **Spain** (Ceuta and Melilla) to the north 


Let me know if you have any other questions about Morocco!

