In [19]:
import dspy
from dspy import TypedPredictor, OutputField, InputField, Signature
from pydantic import BaseModel, Field
from typing import Optional, Union, Generic, TypeVar
from importlib.metadata import version

In [20]:
version('dspy-ai')

'2.4.13'

In [21]:
version('dspy')

'0.1.5'

In [22]:
version('pydantic')

'2.8.2'

In [23]:
class Information(BaseModel):
    pass

T = TypeVar('T', bound=Information)

class Answer(BaseModel, Generic[T]):
    answer: T = Field()
    reasoning: str = Field()
    
class Person(Information):
    name: str = Field()
    age: int = Field()    
    
class Price(Information):
    amount: float = Field()
    currency: str = Field()
    

### Good case: answer field in the signature is mandatory

In [24]:
class MySignature_1(Signature, Generic[T]):
    question: str = InputField()
    answer: Answer[T] = OutputField()

module_1 = TypedPredictor(MySignature_1[Person])
signature_1 = module_1._prepare_signature()
signature_1

StringSignature(question -> answer
    instructions='Given the fields `question`, produce the fields `answer`.'
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}', 'format': <function TypedPredictor._prepare_signature.<locals>.<lambda> at 0x0000027BA4C149A0>})
    answer = Field(annotation=Answer[Person] required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Answer:', 'desc': '${answer}. Respond with a single JSON object. JSON Schema: {"$defs": {"Person": {"properties": {"name": {"title": "Name", "type": "string"}, "age": {"title": "Age", "type": "integer"}}, "required": ["name", "age"], "title": "Person", "type": "object"}}, "properties": {"answer": {"$ref": "#/$defs/Person"}, "reasoning": {"title": "Reasoning", "type": "string"}}, "required": ["answer", "reasoning"], "title": "Answer[Person]", "type": "object"}', 'format': <function TypedPredictor._prepare_signature.<local

In [25]:
desc_1 = signature_1.fields.get('answer').json_schema_extra.get('desc')
i = desc_1.find("JSON Schema: ")
schema_1 = desc_1[i:]
print(schema_1)

JSON Schema: {"$defs": {"Person": {"properties": {"name": {"title": "Name", "type": "string"}, "age": {"title": "Age", "type": "integer"}}, "required": ["name", "age"], "title": "Person", "type": "object"}}, "properties": {"answer": {"$ref": "#/$defs/Person"}, "reasoning": {"title": "Reasoning", "type": "string"}}, "required": ["answer", "reasoning"], "title": "Answer[Person]", "type": "object"}


### If the LM returns a JSON with this schema, it can be parsed to our Pydantic class "Answer"

In [26]:
test_1 = '''{
  "answer": {
    "name": "Alice Johnson",
    "age": 28
  },
  "reasoning": "Alice is highly recommended due to her strong analytical skills and extensive experience in the industry."
}'''


field = signature_1.fields.get('answer')
parser = field.json_schema_extra.get("parser", lambda x: x)
parser(test_1)

Answer[Person](answer=Person(name='Alice Johnson', age=28), reasoning='Alice is highly recommended due to her strong analytical skills and extensive experience in the industry.')

## Bad case: answer field in the signature is Optional

In [27]:
class MySignature_2(Signature, Generic[T]):
    question: str = InputField()
    answer: Optional[Answer[T]] = OutputField()

module_2 = TypedPredictor(MySignature_2[Person])
signature_2 = module_2._prepare_signature()
signature_2

StringSignature(question -> answer
    instructions='Given the fields `question`, produce the fields `answer`.'
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}', 'format': <function TypedPredictor._prepare_signature.<locals>.<lambda> at 0x0000027BA4C17D80>})
    answer = Field(annotation=Output required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Answer:', 'desc': '${answer}. Respond with a single JSON object. JSON Schema: {"$defs": {"Answer_Person_": {"properties": {"answer": {"$ref": "#/$defs/Person"}, "reasoning": {"title": "Reasoning", "type": "string"}}, "required": ["answer", "reasoning"], "title": "Answer[Person]", "type": "object"}, "Person": {"properties": {"name": {"title": "Name", "type": "string"}, "age": {"title": "Age", "type": "integer"}}, "required": ["name", "age"], "title": "Person", "type": "object"}}, "properties": {"value": {"anyOf": [{"$ref": "#/$de

### In the JSON Schema we see that property "answer" is replaced by property "value". 

In [28]:
desc_2 = signature_2.fields.get('answer').json_schema_extra.get('desc')
i = desc_2.find("JSON Schema: ")
schema_2 = desc_2[i:]
print(schema_2)

JSON Schema: {"$defs": {"Answer_Person_": {"properties": {"answer": {"$ref": "#/$defs/Person"}, "reasoning": {"title": "Reasoning", "type": "string"}}, "required": ["answer", "reasoning"], "title": "Answer[Person]", "type": "object"}, "Person": {"properties": {"name": {"title": "Name", "type": "string"}, "age": {"title": "Age", "type": "integer"}}, "required": ["name", "age"], "title": "Person", "type": "object"}}, "properties": {"value": {"anyOf": [{"$ref": "#/$defs/Answer_Person_"}, {"type": "null"}]}}, "required": ["value"], "title": "Output", "type": "object"}


### If the LM returns a JSON with this schema, we get a Validation error when parsing parsing to our Pydantic class "Answer"

In [29]:
test_2 = '''{
  "value": {
    "answer": {
      "name": "John Doe",
      "age": 30
    },
    "reasoning": "John is the best candidate due to his extensive experience."
  }
}'''


field = signature_2.fields.get('answer')
parser = field.json_schema_extra.get("parser", lambda x: x)
parser(test_2)

ValidationError: 1 validation error for Output
value
  Field required [type=missing, input_value={'answer': {'name': 'John... extensive experience.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.8/v/missing