In [1]:
from langchain.prompts import PromptTemplate
from nemoguardrails import LLMRails, RailsConfig
from nemoguardrails.llm.providers import register_llm_provider
from nemoguardrails.llm.helpers import get_llm_instance_wrapper
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

In [2]:
model_id = "../models/TheBloke/Mistral-7B-Instruct-v0.2-GPTQ/gptq-4bit-32g-actorder_True/"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.


In [3]:
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512, device=0)
llm = HuggingFacePipeline(pipeline=pipe)

In [4]:
HFPipeline = get_llm_instance_wrapper(llm_instance=llm, llm_type="hf_pipeline_mistral_7b")
register_llm_provider("hf_pipeline_mistral_7b", HFPipeline)

In [5]:
## Load rails config
config = RailsConfig.from_path("./")

In [6]:
## Create rails
llm_rails = LLMRails(config, verbose=True)



Entered verbose mode.


Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/650 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

model.onnx:   0%|          | 0.00/90.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

In [7]:
template = """<s>[INST] {question} [/INST]"""
prompt = PromptTemplate.from_template(template)
question = "What is electroencephalography?"

In [10]:
# Generate the LLM output with the guardrails applied
output = await llm_rails.generate_async(prompt='{"question": question}')

[36mEvent[0m [38;5;32mUtteranceUserActionFinished[0m {'final_transcript': '{"question": question}'}[0m
[36mEvent[0m [38;5;32mStartInternalSystemAction[0m {'uid': 'ffed11ce-db48-461d-a3f2-45f3da3edb8b', 'event_created_at': '2024-02-18T21:57:09.878608+00:00', 'source_uid': 'NeMoGuardrails', 'action_name': 'create_event', 'action_params': {'event': {'_type': 'UserMessage', 'text': '$user_message'}}, 'action_result_key': None, 'action_uid': 'd8030227-98a2-4da7-bd8f-a462d721eb83', 'is_system_action': True}[0m
[36mExecuting action[0m create_event[0m
[36mEvent[0m [38;5;32mUserMessage[0m {'uid': 'bf51b689-2340-41c6-8e8a-448ce45f99d3', 'event_created_at': '2024-02-18T21:57:09.879045+00:00', 'source_uid': 'NeMoGuardrails', 'text': '{"question": question}'}[0m
[36mEvent[0m [38;5;32mStartInternalSystemAction[0m {'uid': 'f5196990-330b-4181-906b-dc89c39b1a82', 'event_created_at': '2024-02-18T21:57:09.879484+00:00', 'source_uid': 'NeMoGuardrails', 'action_name': 'generate_user_i

In [9]:
output

"I'm sorry, an internal error has occurred."

In [None]:
# Add a topical guardrail to prevent the LLM from talking about politics
llm_rails.add_rail("topical", topics=["politics"], action="reject")

# Add a safety guardrail to filter out unwanted language
llm_rails.add_rail("safety", action="filter")


AttributeError: 'LLMRails' object has no attribute 'add_rail'

In [None]:
res = await rails.generate_async(prompt="hello")
print(res)

[36mEvent[0m [38;5;32mUtteranceUserActionFinished[0m {'final_transcript': 'hello'}[0m
[36mEvent[0m [38;5;32mStartInternalSystemAction[0m {'uid': 'cab4dc85-ff98-4714-a6b9-7e4fb196799a', 'event_created_at': '2024-02-18T17:55:01.434812+00:00', 'source_uid': 'NeMoGuardrails', 'action_name': 'create_event', 'action_params': {'event': {'_type': 'UserMessage', 'text': '$user_message'}}, 'action_result_key': None, 'action_uid': '3bac8469-4657-44aa-9d9b-b8797875a715', 'is_system_action': True}[0m
[36mExecuting action[0m create_event[0m
[36mEvent[0m [38;5;32mUserMessage[0m {'uid': 'b2241932-bfd8-4123-bed9-8369d5fa864b', 'event_created_at': '2024-02-18T17:55:01.435141+00:00', 'source_uid': 'NeMoGuardrails', 'text': 'hello'}[0m
[36mEvent[0m [38;5;32mStartInternalSystemAction[0m {'uid': '27f728bb-a5ec-4544-b011-533a6770338e', 'event_created_at': '2024-02-18T17:55:01.435599+00:00', 'source_uid': 'NeMoGuardrails', 'action_name': 'generate_user_intent', 'action_params': {}, 'acti

In [None]:
res

"I'm sorry, an internal error has occurred."

In [4]:
import torch
torch.version.cuda

'12.1'