In [None]:
! pip install dspy langchain_openai

Collecting dspy
  Downloading dspy-2.6.27-py3-none-any.whl.metadata (7.0 kB)
Collecting langchain_openai
  Downloading langchain_openai-0.3.27-py3-none-any.whl.metadata (2.3 kB)
Collecting backoff>=2.2 (from dspy)
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting ujson>=5.8.0 (from dspy)
  Downloading ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.3 kB)
Collecting datasets>=2.14.6 (from dspy)
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting optuna>=3.4.0 (from dspy)
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting magicattr>=0.1.6 (from dspy)
  Downloading magicattr-0.1.6-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting litellm>=1.60.3 (from dspy)
  Downloading litellm-1.73.6-py3-none-any.whl.metadata (39 kB)
Collecting diskcache>=5.6.0 (from dspy)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Collecting json-repair>=0.30.0 (from dspy)
  Downloading json_repair

In [None]:
import os
from google.colab import userdata
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

In [None]:
import dspy

In [None]:
from dspy import Signature,Predict,settings

In [None]:
import dspy
lm = dspy.LM("openai/gpt-4o-mini")
dspy.configure(lm=lm)

In [None]:
lm("Hi, how are you?")

["Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?"]

In [None]:
model_pred=Predict('question->answer')
model_output=model_pred(question="what is the capital of india?")

In [None]:
model_output

Prediction(
    answer='The capital of India is New Delhi.'
)

In [None]:
lm.inspect_history(n=1)





[34m[2025-07-02T09:40:35.725495][0m

[31mSystem message:[0m

Your input fields are:
1. `question` (str):
Your output fields are:
1. `answer` (str):
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## question ## ]]
{question}

[[ ## answer ## ]]
{answer}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Given the fields `question`, produce the fields `answer`.


[31mUser message:[0m

[[ ## question ## ]]
what is the capital of india?

Respond with the corresponding output fields, starting with the field `[[ ## answer ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.


[31mResponse:[0m

[32m[[ ## answer ## ]]
The capital of India is New Delhi.

[[ ## completed ## ]][0m







In [None]:
class SpamDetect(dspy.Signature):
    """Detect if an email is spam."""
    email: str = dspy.InputField()
    is_spam: bool = dspy.OutputField()
    confidence: float = dspy.OutputField()

spam_detector = dspy.Predict(SpamDetect)
response = spam_detector(email="Congratulations! You've won a free vacation. Click here to claim!")
print(f"Is Spam: {response.is_spam}, Confidence: {response.confidence:.2f}")


Is Spam: True, Confidence: 0.95


In [None]:
from typing import Literal

class Router(Signature):
    """Classify the given query is used to determine whether the answer should be retrieved from an unstructured or structured dataset"""

    query: str = dspy.InputField()
    database: Literal["Unstructured", "Structured"] = dspy.OutputField()

classify = dspy.Predict(Router)
classify(query="impact of isreal iran war")

Prediction(
    database='Unstructured'
)

In [None]:
lm.inspect_history(n=1)





[34m[2025-07-02T09:59:29.659296][0m

[31mSystem message:[0m

Your input fields are:
1. `query` (str):
Your output fields are:
1. `database` (Literal['Unstructured', 'Structured']):
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## query ## ]]
{query}

[[ ## database ## ]]
{database}        # note: the value you produce must exactly match (no extra characters) one of: Unstructured; Structured

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Classify the given query is used to determine whether the answer should be retrieved from an unstructured or structured dataset


[31mUser message:[0m

[[ ## query ## ]]
impact of isreal iran war

Respond with the corresponding output fields, starting with the field `[[ ## database ## ]]` (must be formatted as a valid Python Literal['Unstructured', 'Structured']), and then ending with the marker for `[[ ## completed ## ]]`.


[31mResponse:[0m

[32m[[ #

In [None]:
class SQL(Signature):
    """Generate SQL code based on a given query and table schema."""
    query: str = dspy.InputField()
    table_schema: str = dspy.InputField()
    sql_query: str = dspy.OutputField()

In [None]:
sql=dspy.Predict(SQL)
sql(query="what is the name of second highest salary employee",table_schema="employee(id,name,salary)").sql_query

'SELECT name \nFROM employee \nWHERE salary = (SELECT DISTINCT salary \n                FROM employee \n                ORDER BY salary DESC \n                LIMIT 1 OFFSET 1);'

In [None]:
lm.inspect_history(n=1)





[34m[2025-07-02T10:32:36.608852][0m

[31mSystem message:[0m

Your input fields are:
1. `query` (str): 
2. `table_schema` (str):
Your output fields are:
1. `sql_query` (str):
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## query ## ]]
{query}

[[ ## table_schema ## ]]
{table_schema}

[[ ## sql_query ## ]]
{sql_query}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Generate SQL code based on a given query and table schema.


[31mUser message:[0m

[[ ## query ## ]]
what is the name of second highest salary employee

[[ ## table_schema ## ]]
employee(id,name,salary)

Respond with the corresponding output fields, starting with the field `[[ ## sql_query ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.


[31mResponse:[0m

[32m[[ ## sql_query ## ]]
SELECT name 
FROM employee 
WHERE salary = (SELECT DISTINCT salary 
                FROM employee 
                ORDER BY s

In [None]:
import dspy

class SQL(dspy.Signature):
    """Generate SQL code based on a given query and table schema."""
    query: str = dspy.InputField()
    table_schema: str = dspy.InputField()
    sql_query: str = dspy.OutputField()
class SQLGenerator(dspy.Module):
    def __init__(self):
        super().__init__()
        self.predict = dspy.Predict(SQL)

    def forward(self, query, table_schema):
        return self.predict(query=query, table_schema=table_schema)

In [None]:
  # Or use HFModel for local
dspy.settings.configure(lm=lm)

# Step 2: Initialize your SQL Generator
sql_generator = SQLGenerator()

# Step 3: Run a query
output = sql_generator(
    query="what is the name of second highest salary employee",
    table_schema="employee(id, name, salary)"
)

# Step 4: Print SQL Query
print("📄 Generated SQL:", output.sql_query)

📄 Generated SQL: SELECT name 
FROM employee 
ORDER BY salary DESC 
LIMIT 1 OFFSET 1;


In [None]:
table_schema="employee(id,name,salary)"
trainset = [
    dspy.Example(query="What are the names of all employees?",table_schema="employee(id,name,salary)", sql_query="SELECT name FROM employee;").with_inputs("query","table_schema"),
    dspy.Example(query="What is the salary of the employee with ID 101?",table_schema="employee(id,name,salary)",sql_query="SELECT salary FROM employee WHERE id = 101;").with_inputs("query","table_schema"),
    dspy.Example(query="List all employees with a salary greater than 50,000.",table_schema= "employee(id,name,salary)",sql_query="SELECT * FROM employee WHERE salary > 50000;").with_inputs("query","table_schema"),
    dspy.Example(query=" How many employees are in the table?",table_schema="employee(id,name,salary)", sql_query="SELECT COUNT(*) FROM employee;").with_inputs("query","table_schema"),
    dspy.Example(query=" What is the average salary of all employees?",table_schema="employee(id,name,salary)", sql_query="SELECT AVG(salary) AS average_salary FROM employee;").with_inputs("query"),
    dspy.Example(query="what is the name of second highest salary employee",table_schema="employee(id,name,salary)", sql_query="SELECT name FROM employee WHERE salary = (SELECT DISTINCT salary FROM employee ORDER BY salary DESC LIMIT 1 OFFSET 1)").with_inputs("query","table_schema"),
]

# 4. Configure DSPy LM
lm = dspy.LM("openai/gpt-4o-mini") # Or use dspy.HFModel
dspy.settings.configure(lm=lm)

# 5. Compile using Teleprompter
# teleprompter = dspy.BootstrapFewShot(metric="exact_match")
# optimized_router = teleprompter.compile(SQLGenerator(), trainset)


from dspy.evaluate import answer_exact_match as metric
from dspy.teleprompt import BootstrapFewShot

optimizer = BootstrapFewShot(
    metric=metric,
    max_bootstrapped_demos=5,
    max_labeled_demos=5,
    max_rounds=10,
)


_optimized = optimizer.compile(student=SQLGenerator(),trainset=trainset)



  0%|          | 0/6 [00:00<?, ?it/s]2025/07/02 14:59:13 ERROR dspy.teleprompt.bootstrap: Failed to run or to evaluate example Example({'query': 'What are the names of all employees?', 'table_schema': 'employee(id,name,salary)', 'sql_query': 'SELECT name FROM employee;'}) (input_keys={'query', 'table_schema'}) with <function answer_exact_match at 0x7c10fb77d800> due to 'Example' object has no attribute 'answer'.
2025/07/02 14:59:13 ERROR dspy.teleprompt.bootstrap: Failed to run or to evaluate example Example({'query': 'What are the names of all employees?', 'table_schema': 'employee(id,name,salary)', 'sql_query': 'SELECT name FROM employee;'}) (input_keys={'query', 'table_schema'}) with <function answer_exact_match at 0x7c10fb77d800> due to 'Example' object has no attribute 'answer'.
2025/07/02 14:59:13 ERROR dspy.teleprompt.bootstrap: Failed to run or to evaluate example Example({'query': 'What are the names of all employees?', 'table_schema': 'employee(id,name,salary)', 'sql_query':

AttributeError: 'Example' object has no attribute 'answer'

In [None]:
import dspy

class SQL(dspy.Signature):
    """Generate SQL code based on a given query and table schema."""
    query: str = dspy.InputField()
    table_schema: str = dspy.InputField()
    answer: str = dspy.OutputField()
class SQLGenerator(dspy.Module):
    def __init__(self):
        super().__init__()
        self.predict = dspy.Predict(SQL)

    def forward(self, query, table_schema):
        return self.predict(query=query, table_schema=table_schema)

table_schema="employee(id,name,salary)"
trainset = [
    dspy.Example(query="What are the names of all employees?",table_schema="employee(id,name,salary)", answer="SELECT name FROM employee;").with_inputs("query","table_schema"),
    dspy.Example(query="What is the salary of the employee with ID 101?",table_schema="employee(id,name,salary)",answer="SELECT salary FROM employee WHERE id = 101;").with_inputs("query","table_schema"),
    dspy.Example(query="List all employees with a salary greater than 50,000.",table_schema= "employee(id,name,salary)",answer="SELECT * FROM employee WHERE salary > 50000;").with_inputs("query","table_schema"),
    dspy.Example(query=" How many employees are in the table?",table_schema="employee(id,name,salary)", answer="SELECT COUNT(*) FROM employee;").with_inputs("query","table_schema"),
    dspy.Example(query=" What is the average salary of all employees?",table_schema="employee(id,name,salary)", answer="SELECT AVG(salary) AS average_salary FROM employee;").with_inputs("query","table_schema"),
    dspy.Example(query="what is the name of second highest salary employee",table_schema="employee(id,name,salary)", answer="SELECT name FROM employee WHERE salary = (SELECT DISTINCT salary FROM employee ORDER BY salary DESC LIMIT 1 OFFSET 1)").with_inputs("query","table_schema"),
]

# 4. Configure DSPy LM
lm = dspy.LM("openai/gpt-4o-mini") # Or use dspy.HFModel
dspy.settings.configure(lm=lm)

# 5. Compile using Teleprompter
# teleprompter = dspy.BootstrapFewShot(metric="exact_match")
# optimized_router = teleprompter.compile(SQLGenerator(), trainset)


from dspy.evaluate import answer_exact_match as metric
from dspy.teleprompt import BootstrapFewShot

optimizer = BootstrapFewShot(
    metric=metric,
    max_bootstrapped_demos=6,
    max_labeled_demos=6,
    max_rounds=10,
)


_optimized = optimizer.compile(student=SQLGenerator(),trainset=trainset)

100%|██████████| 6/6 [00:17<00:00,  2.99s/it]

Bootstrapped 3 full traces after 5 examples for up to 10 rounds, amounting to 33 attempts.





In [None]:
lm.inspect_history(n=2)





[34m[2025-07-02T15:02:03.566636][0m

[31mSystem message:[0m

Your input fields are:
1. `query` (str): 
2. `table_schema` (str):
Your output fields are:
1. `answer` (str):
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## query ## ]]
{query}

[[ ## table_schema ## ]]
{table_schema}

[[ ## answer ## ]]
{answer}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Generate SQL code based on a given query and table schema.


[31mUser message:[0m

[[ ## query ## ]]
 How many employees are in the table?

[[ ## table_schema ## ]]
employee(id,name,salary)


[31mAssistant message:[0m

[[ ## answer ## ]]
SELECT COUNT(*) FROM employee;


[31mUser message:[0m

[[ ## query ## ]]
What are the names of all employees?

[[ ## table_schema ## ]]
employee(id,name,salary)


[31mAssistant message:[0m

[[ ## answer ## ]]
SELECT name FROM employee;


[31mUser message:[0m

[[ ## query ## ]]
What is the salary 

In [None]:
import dspy
from dspy.teleprompt import BootstrapFewShot
class SQL(dspy.Signature):
  """Generate SQL code based on a given query and table schema.table_schema='employee(id,name,salary)'"""
  query: str = dspy.InputField()
  sql_query: str = dspy.OutputField()
class SQLGenerator(dspy.Module):
    def __init__(self):
        super().__init__()
        self.predict = dspy.Predict(SQL)

    def forward(self, query, table_schema):
        return self.predict(query=query)


# 2. Training Examples
trainset = [
    dspy.Example(
        query="What are the names of all employees?",
        sql_query="SELECT name FROM employee;"
    ).with_inputs("query"),

    dspy.Example(
        query="What is the salary of the employee with ID 101?",
        sql_query="SELECT salary FROM employee WHERE id = 101;"
    ).with_inputs("query"),

    dspy.Example(
        query="List all employees with a salary greater than 50,000.",
        sql_query="SELECT * FROM employee WHERE salary > 50000;"
    ).with_inputs("query"),

    dspy.Example(
        query="How many employees are in the table?",
        sql_query="SELECT COUNT(*) FROM employee;"
    ).with_inputs("query"),

    dspy.Example(
        query="What is the average salary of all employees?",
        sql_query="SELECT AVG(salary) AS average_salary FROM employee;"
    ).with_inputs("query"),

    dspy.Example(
        query="What is the name of second highest salary employee?",
        sql_query="SELECT name FROM employee WHERE salary = (SELECT DISTINCT salary FROM employee ORDER BY salary DESC LIMIT 1 OFFSET 1);"
    ).with_inputs("query")
]

# 3. Configure LM
# lm = dspy.OpenAI(model="gpt-4o")  # or use gpt-3.5-turbo or HFModel
# dspy.settings.configure(lm=lm)

# 4. Use Teleprompter to optimize the prompt
# teleprompter = dspy.BootstrapFewShot(metric="exact_match")
# optimized_sql_predictor = dspy.BootstrapFewShot.compile(SQLGenerator(), trainset=trainset)

# better_together = dspy.BetterTogether(metric="exact_match")  # Use other metrics if needed
# optimized_router = dspy.BetterTogether.compile(
#     student=SQLGenerator(),  # Your router logic/module
#     trainset=trainset,
#     strategy="p -> w -> p",  # Default strategy
#     valset_ratio=0.1         # Optional, defaults to 0.1
# )

TypeError: BetterTogether.compile() missing 1 required positional argument: 'self'

In [None]:
# 5. Test the optimized predictor
test_query = "What is the name of second highest salary employee?"
test_schema = "employee(id,name,salary)"

result = optimized_sql_predictor(query=test_query, table_schema=test_schema)
print("📄 Generated SQL:\n", result.sql_query)


In [None]:
! pip install deno

Collecting deno
  Downloading deno-0.0.2-py3-none-manylinux_2_17_x86_64.whl.metadata (696 bytes)
Downloading deno-0.0.2-py3-none-manylinux_2_17_x86_64.whl (42.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 MB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deno
Successfully installed deno-0.0.2


In [None]:
from dspy import ProgramOfThought
sql_code=ProgramOfThought(SQL)
result=sql_code()

2025/07/02 10:29:56 ERROR dspy.predict.program_of_thought: Error in code execution: [Errno 13] Permission denied: 'deno'
2025/07/02 10:30:00 ERROR dspy.predict.program_of_thought: Error in code execution: [Errno 13] Permission denied: 'deno'
2025/07/02 10:30:04 ERROR dspy.predict.program_of_thought: Error in code execution: [Errno 13] Permission denied: 'deno'


RuntimeError: Max hops reached. Failed to run ProgramOfThought: [Errno 13] Permission denied: 'deno'

In [None]:
class SQLSignature(Signature):
  """Generate SQL code based on a given query and table schema."""
  query: dspy.InputField()
  table_schema: dspy.InputField()
  sql_query: dspy.OutputField()

# Define the ProgramOfThought module
class SQLGeneration(dspy.Module):
    def __init__(self):
        super().__init__()
        self.plan = dspy.Predict(SQLSignature)

    def forward(self, query, table_schema):
        return self.plan(query=query, table_schema=table_schema)


PydanticSchemaGenerationError: Unable to generate pydantic-core schema for FieldInfo(annotation=NoneType, required=True, json_schema_extra={'__dspy_field_type': 'input'}). Set `arbitrary_types_allowed=True` in the model_config to ignore this error or implement `__get_pydantic_core_schema__` on your type to fully support it.

If you got this error by calling handler(<some type>) within `__get_pydantic_core_schema__` then you likely need to call `handler.generate_schema(<some type>)` since we do not call `__get_pydantic_core_schema__` on `<some type>` otherwise to avoid infinite recursion.

For further information visit https://errors.pydantic.dev/2.11/u/schema-for-unknown-type