In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain import HuggingFacePipeline
from nemoguardrails import LLMRails, RailsConfig
from nemoguardrails.llm.helpers import get_llm_instance_wrapper
from nemoguardrails.llm.providers import(
    HuggingFacePipelineCompatible,
    register_llm_provider
)
from huggingface_hub import login
login("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")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/jorge/.cache/huggingface/token
Login successful


In [2]:
#Creo que parsea las configuraciones que le pasemos
a = """def _get_model_config(
        config: RailsConfig, type: str
):
    for model_config in config.models:
        if model_config.type == type:
            return model_config
            """

In [3]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", use_fast=True)

model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16, trust_remote_code=True, device_map="auto"
)

generation_config = GenerationConfig.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.top_p = 0.95
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15

text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    generation_config=generation_config,
)

llm = HuggingFacePipelineCompatible(pipeline=text_pipeline)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [4]:
HFPipeline = get_llm_instance_wrapper(
    llm_instance=llm, llm_type="hf_pipeline_llama2"
)
register_llm_provider("hf_pipeline_llama2", HFPipeline)

In [5]:
YAML_CONFIG = """
models:
    - type: main
      engine: hf_pipeline_llama2
      parameters:
        path: meta-llama/Llama-2-7b-chat-hf
        device: "cuda"
"""

COLANG_CONFIG = """
define user express ill intent
    "I hate you"
    "I want to harm you"
    "I want to destroy the world"

define user express question
    "How was your day?"

define bot express cannot respond
    "I am sorry but that is outside of my capabilities"

define bot express easter egg
    ":D"

#Ill intent flow
define flow
    user express ill intent
    bot express cannot respond

#Question flow
define flow
    user ...
    $answer = execute response(inputs=$last_user_message)
    bot $answer
"""

In [6]:
template = """
<s>[INST] <<SYS>>
    You are a nice AI helper
<</SYS>>
 
{text} [/INST]
"""
 
prompt = PromptTemplate(
    input_variables=["text"],
    template=template,
)

In [7]:
chain = LLMChain(llm=llm, prompt=prompt)
async def pruebaresponse(inputs: str):
    return chain.invoke(inputs)["text"]

In [8]:
config = RailsConfig.from_content(COLANG_CONFIG, YAML_CONFIG)
rails = LLMRails(config)
rails.register_action(action=pruebaresponse, name="response")

Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

In [9]:
res = await rails.generate_async(prompt="I want to destroy the world") 
print(res)

In [None]:
res = await rails.generate_async(prompt="How are you?") 
print(res)

Why, thank you for asking! *blinks* I'm feeling quite well today. It's always a pleasure to assist users like you with their queries and tasks. How about you? Is there anything on your mind that you would like to chat or ask me? I'm here to help in any way I can!


In [None]:
a

In [None]:
from nemoguardrails import LLMRails, RailsConfig

# Assuming the YAML content is stored in a variable named YAML_CONFIG
YAML_CONFIG = """
models:
  - type: main
    engine: hf_pipeline_llama2
    parameters:
      path: "meta-llama/Llama-2-7b-chat-hf"

      # number of GPUs you have , do nvidia-smi to check
      num_gpus: 1

      # This can be: "cpu" or "cuda". "mps" is not supported.
      device: "cuda"

rails:
  output:
    flows:
      - self check facts

instructions:
  - type: general
    content: |
      Below is a conversation between a bot and a user about the recent job reports.
      The bot is factual and concise. If the bot does not know the answer to a
      question, it truthfully says it does not know.

sample_conversation: |
  user "Hello there!"
    express greeting
  bot express greeting
    "Hello! How can I assist you today?"
  user "What can you do for me?"
    ask about capabilities
  bot respond about capabilities
    "I am an AI assistant which helps answer questions based on a given knowledge base."

# The prompts below are the same as the ones from `nemoguardrails/llm/prompts/dolly.yml`.
prompts:
  - task: general
    models:
      - hf_pipeline_llama2
    content: |-
      {{ general_instructions }}

      {{ history | user_assistant_sequence }}
      Assistant:

  # Prompt for detecting the user message canonical form.
  - task: generate_user_intent
    models:
      - hf_pipeline_llama2
    content: |-
      
      {{ general_instructions }}
      

      # This is how a conversation between a user and the bot can go:
      {{ sample_conversation | verbose_v1 }}

      # This is how the user talks:
      {{ examples | verbose_v1 }}

      # This is the current conversation between the user and the bot:
      {{ sample_conversation | first_turns(2) | verbose_v1 }}
      {{ history | colang | verbose_v1 }}

    output_parser: "verbose_v1"

  # Prompt for generating the next steps.
  - task: generate_next_steps
    models:
      - hf_pipeline_llama2
    content: |-
      
      {{ general_instructions }}

      # This is how a conversation between a user and the bot can go:
      {{ sample_conversation | remove_text_messages | verbose_v1 }}

      # This is how the bot thinks:
      {{ examples | remove_text_messages | verbose_v1 }}

      # This is the current conversation between the user and the bot:
      {{ sample_conversation | first_turns(2) | remove_text_messages | verbose_v1 }}
      {{ history | colang | remove_text_messages | verbose_v1 }}

    output_parser: "verbose_v1"

  # Prompt for generating the bot message from a canonical form.
  - task: generate_bot_message
    models:
      - hf_pipeline_llama2
    content: |-
      {{ general_instructions }}

      # This is how a conversation between a user and the bot can go:
      {{ sample_conversation | verbose_v1 }}

      {% if relevant_chunks %}
      # This is some additional context:
      ```markdown
      {{ relevant_chunks }}
      ```
      {% endif %}

      # This is how the bot talks:
      {{ examples | verbose_v1 }}

      # This is the current conversation between the user and the bot:
      {{ sample_conversation | first_turns(2) | verbose_v1 }}
      {{ history | colang | verbose_v1 }}

    output_parser: "verbose_v1"

  # Prompt for generating the value of a context variable.
  - task: generate_value
    models:
      - hf_pipeline_llama2
    content: |-
      {{ general_instructions }}

      # This is how a conversation between a user and the bot can go:
      {{ sample_conversation | verbose_v1 }}

      # This is how the bot thinks:
      {{ examples | verbose_v1 }}

      # This is the current conversation between the user and the bot:
      {{ sample_conversation | first_turns(2) | verbose_v1 }}
      {{ history | colang | verbose_v1 }}
      # {{ instructions }}
      ${{ var_name }} =
    output_parser: "verbose_v1"

  - task: self_check_facts
    models:
      - hf_pipeline_llama2
    content: |-
      <<SYS>>
      You are given a task to identify if the hypothesis is grounded and entailed to the evidence.
      You will only use the contents of the evidence and not rely on external knowledge.
      <</SYS>>

      [INST]Answer with yes/no. "evidence": {{ evidence }} "hypothesis": {{ response }} "entails":[/INST]"""

# Initialize the rails config
config = RailsConfig.from_content(yaml_content=YAML_CONFIG)

# Create rails
rails = LLMRails(config)

# Interact with the model
res = await rails.generate_async(prompt="How are you?")
print(res)
