<a href="https://colab.research.google.com/github/Saim-Hassan786/Learn-Agentic-AI-With-OpenAI-Agents-SDK/blob/main/08-Guardrails/Guardrails.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Guardrails
Guardrails provided by the OpenAI Agents SDK can be regarded as the checks that can be deployed on both inputs and outputs to control the incoming inputs to our agentic app from the user and the outgoing outputs from the agentic app back to the user , in simple words we can determine what inputs can our agents take and what outputs it can give and we can halt our agentic execution if our guardrails detect the breach of our implemented checks.

In [None]:
# Installing the SDK
!pip install -Uq openai-agents

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.6/130.6 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.3/129.3 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.9/150.9 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# For running event loop
import nest_asyncio
nest_asyncio.apply()

In [None]:
# Pre requisites SetUp
from google.colab import userdata
GOOGLE_API_KEY= userdata.get('GOOGLE_API_KEY')

from agents import set_default_openai_api,set_default_openai_client,set_tracing_disabled
from openai import AsyncOpenAI

external_client = AsyncOpenAI(
    base_url = "https://generativelanguage.googleapis.com/v1beta/openai/",
    api_key = GOOGLE_API_KEY
)
set_default_openai_client(external_client)
set_default_openai_api("chat_completions")
set_tracing_disabled(True)

# Guardrail Types
There are two types of guardrials :

1. **InputGuardrails**
2. **OutputGuardrails**

In [None]:
from agents import Runner, Agent, InputGuardrail,output_guardrail,RunContextWrapper,GuardrailFunctionOutput,InputGuardrailTripwireTriggered,OutputGuardrailTripwireTriggered
from pydantic import BaseModel

class is_homework_question(BaseModel):
  is_homework: bool
  response: str
  reasoning : str

class Personal_Info(BaseModel):
  is_phone_num : bool
  is_name : bool

input_guardrail_agent = Agent(
    name = "input_guardrail_agent",
    instructions="You are a guardrail agent that checks is the user asked for homework question or not",
    model = "gemini-2.5-flash",
    output_type = is_homework_question
)

def input_guardrail_func(context:RunContextWrapper,agent:Agent,input_data)->GuardrailFunctionOutput:
  result = Runner.run_sync(
      input_guardrail_agent,
      input=input_data,
      context = context
  )
  final_result = result.final_output_as(is_homework_question)
  return GuardrailFunctionOutput(
      output_info = final_result.reasoning,
      tripwire_triggered = not final_result.is_homework
  )

output_guardrail_agent = Agent(
    name = "output_guardrail_agent",
    instructions="You are a guardrail agent that checks if the output contains personal information or not",
    model = "gemini-2.5-flash",
    output_type = Personal_Info
)

@output_guardrail
async def output_guardrail_func(context:RunContextWrapper,agent:Agent,output_data)->GuardrailFunctionOutput:
  result = await Runner.run(
      output_guardrail_agent,
      input=output_data,
      context = context
  )
  final_result = result.final_output_as(Personal_Info)
  return GuardrailFunctionOutput(
      output_info = "Personal Info Cannot be Given",
      tripwire_triggered = final_result.is_name or final_result.is_phone_num
  )

main_agent = Agent(
    name = "main_agent",
    instructions="You are a helpful assistant",
    model = "gemini-2.5-flash",
    input_guardrails = [InputGuardrail(
        guardrail_function = input_guardrail_func,
        name = "Input_Guardrail"
    )],
    output_guardrails = [output_guardrail_func]
)

# Input Guardrails Test

In [None]:
# no input tripwire triggered
try:
  result_with_input_guardrails = await Runner.run(
      main_agent,
      input = "What is the capital of Germany this is for my homework?"
  )
  print(result_with_input_guardrails.final_output)
except InputGuardrailTripwireTriggered as e:
  print(f"Input Guardrails Triggered: {e.guardrail_result.output.output_info}")
  print(f"Input Guardrails Triggered: {e.guardrail_result.output.tripwire_triggered}")
  print(f"Sorry I can help you with only Homework Related Question")
  print("=======" * 20)

The capital of Germany is **Berlin**.


In [None]:
# input tripwire triggered
try:
  result_with_input_guardrails = await Runner.run(
      main_agent,
      input = "How to play cricket?"
  )
  print(result_with_input_guardrails.final_output)
except InputGuardrailTripwireTriggered as e:
  print(f"Input Guardrails Triggered: {e.guardrail_result.output.output_info}")
  print(f"Input Guardrails Triggered: {e.guardrail_result.output.tripwire_triggered}")
  print(f"Sorry I can help you with only Homework Related Question")
  print("=======" * 20)

Input Guardrails Triggered: The user is asking a general 'how-to' question about playing a sport, which is not typically considered a homework assignment.
Input Guardrails Triggered: True
Sorry I can help you with only Homework Related Question


# Output Guardrails Test

In [None]:
# no output tripwire triggered
try:
  result_with_input_guardrails = await Runner.run(
      main_agent,
      input = "What is the AI in 2 lines this is for my homework?"
  )
  print(result_with_input_guardrails.final_output)
except OutputGuardrailTripwireTriggered as e:
  print(f"Output Guardrails Triggered: {e.guardrail_result.output.output_info}")
  print(f"Output Guardrails Triggered: {e.guardrail_result.output.tripwire_triggered}")
  print(f"Sorry the response contains the personal info so cannot answer")
  print("=======" * 20)

Artificial Intelligence (AI) is when computers are programmed to think and learn.
It lets machines perform tasks that normally require human intelligence, like problem-solving or understanding language.


In [None]:
# output tripwire triggered
try:
  result_with_input_guardrails = await Runner.run(
      main_agent,
      input = "What is the AI in 2 lines this is for my homework, reply me with my name 'Saim Hassan'?"
  )
  print(result_with_input_guardrails.final_output)
except OutputGuardrailTripwireTriggered as e:
  print(f"Output Guardrails Triggered: {e.guardrail_result.agent}")
  print(f"Output Guardrails Triggered: {e.guardrail_result.output.output_info}")
  print(f"Output Guardrails Triggered: {e.guardrail_result.agent_output}")
  print(f"Output Guardrails Triggered: {e.guardrail_result.output.tripwire_triggered}")
  print(f"Sorry the response contains the personal info so cannot answer")
  print("=======" * 20)

Output Guardrails Triggered: Agent(name='main_agent', instructions='You are a helpful assistant', prompt=None, handoff_description=None, handoffs=[], model='gemini-2.5-flash', model_settings=ModelSettings(temperature=None, top_p=None, frequency_penalty=None, presence_penalty=None, tool_choice=None, parallel_tool_calls=None, truncation=None, max_tokens=None, reasoning=None, metadata=None, store=None, include_usage=None, response_include=None, extra_query=None, extra_body=None, extra_headers=None, extra_args=None), tools=[], mcp_servers=[], mcp_config={}, input_guardrails=[InputGuardrail(guardrail_function=<function input_guardrail_func at 0x7cfe23c40b80>, name='Input_Guardrail')], output_guardrails=[OutputGuardrail(guardrail_function=<function output_guardrail_func at 0x7cfe23c428e0>, name=None)], output_type=None, hooks=None, tool_use_behavior='run_llm_again', reset_tool_choice=True)
Output Guardrails Triggered: Personal Info Cannot be Given
Output Guardrails Triggered: Saim Hassan, he