# Recovery

Recovery: Manages failures and exceptions gracefully in agent workflows.
This component implements retry logic, fallback processes, and error handling to ensure system resilience.

We will also work with python libraries like:
* Tenacity: a library for retrying functions, and
* PyBreaker: a library for Circuit Breaker pattern.

## Recovery with Groq and Gemini

In [1]:
from groq import Groq
from dotenv import load_dotenv
import os
from pydantic import BaseModel, Field
from typing import Optional
import json
from groq import RateLimitError, APIError, APIConnectionError
import time
import random
from httpx import Response, Request
load_dotenv()

True

In [2]:
client = Groq(
    api_key=os.getenv("GROQ_API_KEY"),
)

MAX_RETRIES = 4
INITIAL_BACKOFF_SECONDS = 1
MAX_BACKOFF_SECONDS = 60

class UserInfo(BaseModel):
    name: str = Field(description="Name of the user")
    age: Optional[int] = Field(default=None, description= "Age of the user")
    email: str = Field(description="Email of the user")

def process_client_request(prompt:str, model: str = "meta-llama/llama-4-scout-17b-16e-instruct", max_tokens = 50):
    retries = 0
    current_backoff_seconds = INITIAL_BACKOFF_SECONDS
    
    while retries<MAX_RETRIES:
        try:
            print(f"Attempt:{retries+1}/{MAX_RETRIES} to process request...")
            # --- START: ARTIFICIAL RATE LIMIT SIMULATION ---
            if retries < 3: # Simulate RateLimitError for the first 3 attempts
                print(f"SIMULATING: RateLimitError for attempt {retries+1}...")

                # Create a dummy httpx.Request object
                dummy_request = Request(method="POST", url="http://api.groq.com/dummy-endpoint")

                # Create a dummy httpx.Response object with the dummy request
                dummy_response = Response(
                    status_code=429,
                    request=dummy_request, # <-- Now passing the dummy request
                    headers={"Retry-After": "60"} # Often present for 429 errors
                )
                # Create a dummy body string (often JSON)
                dummy_body = json.dumps({"error": {"message": "You are being rate limited.", "type": "rate_limit_error"}})

                # Raise the RateLimitError correctly
                raise RateLimitError("Simulated Rate Limit Exceeded", response=dummy_response, body=dummy_body)
            # --- END: ARTIFICIAL RATE LIMIT SIMULATION ---
            messages = [
                {
                    "role": "system",
                    "content": "Extract information from the text."
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ]
            response = client.chat.completions.create(
                model=model,
                messages = messages,
                response_format={
                    "type": "json_schema",
                    "json_schema": {
                        "name": "user_info",
                        "schema": UserInfo.model_json_schema()
                    }
                },
                temperature=0.7,
                max_tokens=max_tokens
            )
            return response.choices[0].message.content
        except RateLimitError as e:
            # "Error detected" -> Retry possible? -> (Yes, for RateLimitError)
            print(f"RateLimitError detected: {e}. Retrying in {current_backoff_seconds:.2f} seconds...")
            time.sleep(current_backoff_seconds)
            retries+=1
            current_backoff_seconds = min(2*current_backoff_seconds+ random.uniform(0,1), MAX_BACKOFF_SECONDS)
            
        except (APIConnectionError, APIError) as e:
            # "Error Detected" -> "Retry Possible?" (Yes, for transient API errors/connection issues)
            print(f"API Error or Connection Error detected: {e}. Retrying in {current_backoff_seconds:.2f} seconds...")
            time.sleep(current_backoff_seconds)
            retries += 1
            current_backoff_seconds = min(current_backoff_seconds * 2 + random.uniform(0, 1), MAX_BACKOFF_SECONDS)
        
        except Exception as e:
            # Catch any other unexpected errors. "Error Detected" -> "Retry Possible?" (No, for unknown/non-retryable errors)
            print(f"An unexpected error occurred: {e}. This might be a non-retryable error.")
            break
        
    print(f"Failed to get a successful response after {MAX_RETRIES} attempts.")
    return None 

def get_answer_with_fallback(prompt: str)-> str:
    response = process_client_request(prompt)
    if response is not None:
        print("--- Successful Response ---")
        return response
    else :
        # this is the "fallback" part
        print("--- Executing Fallback ---")
        fallback_message = "I am sorry, I couldn't process your request at this time. Please try again later or contact support."
        print(f"Fallback Message: {fallback_message}")
        # return fallback_message
    
    
prompt1 = "Please extract information about the user from this text: Name: Jane Smith, Email: jane.s@web.org"
final_answer1 = get_answer_with_fallback(prompt1)
print(f"\nFinal Answer for Prompt 1:\n{final_answer1}")

Attempt:1/4 to process request...
SIMULATING: RateLimitError for attempt 1...
RateLimitError detected: Simulated Rate Limit Exceeded. Retrying in 1.00 seconds...
Attempt:2/4 to process request...
SIMULATING: RateLimitError for attempt 2...
RateLimitError detected: Simulated Rate Limit Exceeded. Retrying in 2.48 seconds...
Attempt:3/4 to process request...
SIMULATING: RateLimitError for attempt 3...
RateLimitError detected: Simulated Rate Limit Exceeded. Retrying in 5.15 seconds...
Attempt:4/4 to process request...
--- Successful Response ---

Final Answer for Prompt 1:
{
  "name": "Jane Smith",
  "email": "jane.s@web.org",
  "age": null
}


## Tenacity

In [3]:
from tenacity import (
    retry,
    stop_after_attempt,
    stop_after_delay,
    wait_exponential,
    retry_if_exception_type
)
MAX_ATTEMPTS = 5
MAX_DELAY_SECONDS = 60
INITIAL_BACKOFF_SECONDS=1
MAX_BACKOFF_SECONDS=60
@retry(
    stop=stop_after_attempt(MAX_ATTEMPTS)|stop_after_delay(MAX_DELAY_SECONDS),
    wait=wait_exponential(multiplier=INITIAL_BACKOFF_SECONDS,max=MAX_BACKOFF_SECONDS, min=INITIAL_BACKOFF_SECONDS),
    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APIError)),
)
def process_client_request_with_tenacity(prompt:str, model: str = "meta-llama/llama-4-scout-17b-16e-instruct", max_tokens = 50):
    client = Groq(
        api_key=os.getenv("GROQ_API_KEY"),
    )
    print("Attempt to process request...")
    print(process_client_request_with_tenacity.statistics)
    current_attempt = process_client_request_with_tenacity.statistics.get('attempt_number', 1)
    
    if current_attempt <= 3: # Accessing tenacity's internal stats
        print(f"SIMULATING: RateLimitError for attempt {current_attempt}...")
        dummy_request = Request(method="POST", url="http://api.groq.com/dummy-endpoint")
        dummy_response = Response(
            status_code=429,
            request=dummy_request,
            headers={"Retry-After": "60"}
        )
        dummy_body = json.dumps({"error": {"message": "You are being rate limited.", "type": "rate_limit_error"}})
        raise RateLimitError("Simulated Rate Limit Exceeded", response=dummy_response, body=dummy_body)
    else:
        print(f"SIMULATION BYPASSED: Attempt {current_attempt}, proceeding with actual Groq call.")
    messages = [
        {
            "role": "system",
            "content": "Extract information from the text."
        },
        {
            "role": "user",
            "content": prompt
        }
    ]
    response= client.chat.completions.create(
        model=model,
        messages=messages,
        response_format={
            "type": "json_schema",
            "json_schema": {
                "name": "user_info",
                "schema": UserInfo.model_json_schema()
            }
        },
        temperature=0.7,
        max_tokens=max_tokens
    )
    return response.choices[0].message.content

def get_answer_with_fallback_tenacity(prompt: str) -> str:
    try:
        # Call the tenacity-decorated function
        response = process_client_request_with_tenacity(prompt)
        print("--- Successful Response ---")
        return response
    except Exception as e:
        # This catch-all exception handles cases where tenacity gives up
        print(f"--- Executing Fallback due to error after retries: {e} ---")
        fallback_message = "I am sorry, I couldn't process your request at this time. Please try again later or contact support."
        print(f"Fallback Message: {fallback_message}")
        return fallback_message


In [4]:
prompt2 = "Please extract information about the user from this text: Name: Jane Smith, Email: jane.s@web.org"
final_answer2 = get_answer_with_fallback_tenacity(prompt2)
print(f"\nFinal Answer for Prompt 1:\n{final_answer2}")

Attempt to process request...
{'start_time': 89291.25, 'attempt_number': 1, 'idle_for': 0}
SIMULATING: RateLimitError for attempt 1...
Attempt to process request...
{'start_time': 89291.25, 'attempt_number': 2, 'idle_for': 1.0, 'delay_since_first_attempt': 0.6399999999994179}
SIMULATING: RateLimitError for attempt 2...
Attempt to process request...
{'start_time': 89291.25, 'attempt_number': 3, 'idle_for': 3.0, 'delay_since_first_attempt': 2.375}
SIMULATING: RateLimitError for attempt 3...
Attempt to process request...
{'start_time': 89291.25, 'attempt_number': 4, 'idle_for': 7.0, 'delay_since_first_attempt': 5.0460000000020955}
SIMULATION BYPASSED: Attempt 4, proceeding with actual Groq call.
--- Successful Response ---

Final Answer for Prompt 1:
{
  "name": "Jane Smith",
  "email": "jane.s@web.org",
  "age": null
}


## Recovery with Langchain: RunnableRetry

In [9]:
from langchain.chat_models import init_chat_model
from langchain_core.runnables import RunnableLambda
from langchain_core.runnables.retry import RunnableRetry



llm = init_chat_model(model="meta-llama/llama-4-scout-17b-16e-instruct", model_provider="groq")


def process_client_request_with_langchain(prompt: str, model: str = "meta-llama/llama-4-scout-17b-16e-instruct", max_tokens=50):
    llm = init_chat_model(model=model, model_provider="groq")
    
    print("Attempt to process request...")
    
    # Fix: Use a manual counter since this function isn't decorated with @retry
    # You can either use a global counter or pass it as a parameter
    if not hasattr(process_client_request_with_langchain, 'attempt_counter'):
        process_client_request_with_langchain.attempt_counter = 0
    
    process_client_request_with_langchain.attempt_counter += 1
    current_attempt = process_client_request_with_langchain.attempt_counter
    
    print(f"Current attempt: {current_attempt}")
    
    if current_attempt <= 3:  # Simulate failures for first 3 attempts
        print(f"SIMULATING: RateLimitError for attempt {current_attempt}...")
        dummy_request = Request(method="POST", url="http://api.groq.com/dummy-endpoint")
        dummy_response = Response(
            status_code=429,
            request=dummy_request,
            headers={"Retry-After": "60"}
        )
        dummy_body = json.dumps({"error": {"message": "You are being rate limited.", "type": "rate_limit_error"}})
        raise RateLimitError("Simulated Rate Limit Exceeded", response=dummy_response, body=dummy_body)
    else:
        print(f"SIMULATION BYPASSED: Attempt {current_attempt}, proceeding with actual Groq call.")
    
    messages = [
        {
            "role": "system",
            "content": "Extract information from the text."
        },
        {
            "role": "user",
            "content": prompt
        }
    ]
    llm_with_structured_output = llm.with_structured_output(UserInfo)
    response = llm_with_structured_output.invoke(messages)
    return response

def get_answer_with_fallback_langchain(prompt: str) -> str:
    runnable = RunnableLambda(process_client_request_with_langchain)


    runnable_with_retries = RunnableRetry(
        bound=runnable,
        retry_exception_types=(RateLimitError, APIConnectionError, APIError),
        max_attempt_number=4,
        wait_exponential_jitter=True,
        exponential_jitter_params={"initial": 2},
    )
    try:
        # Call the tenacity-decorated function
        response = runnable_with_retries.invoke(prompt)
        print("--- Successful Response ---")
        return response
    except Exception as e:
        # This catch-all exception handles cases where tenacity gives up
        print(f"--- Executing Fallback due to error after retries: {e} ---")
        fallback_message = "I am sorry, I couldn't process your request at this time. Please try again later or contact support."
        print(f"Fallback Message: {fallback_message}")


In [10]:
prompt3 = "Please extract information about the user from this text: Name: Jane Smith, Email: jane.s@web.org"
final_answer3 = get_answer_with_fallback_langchain(prompt3)
print(f"\nFinal Answer for Prompt 1:\n{final_answer3}")

Attempt to process request...
Current attempt: 1
SIMULATING: RateLimitError for attempt 1...
Attempt to process request...
Current attempt: 2
SIMULATING: RateLimitError for attempt 2...
Attempt to process request...
Current attempt: 3
SIMULATING: RateLimitError for attempt 3...
Attempt to process request...
Current attempt: 4
SIMULATION BYPASSED: Attempt 4, proceeding with actual Groq call.
--- Successful Response ---

Final Answer for Prompt 1:
name='Jane Smith' age=None email='jane.s@web.org'
