# Imports and config

In [None]:
import os
import dspy
from dotenv import load_dotenv
import mlflow
import time
import json
from datetime import datetime
# Load environment variables from .env file
load_dotenv()

# Verify API key is loaded
api_key = os.getenv("OPENAI_API_KEY")
if api_key:
    print("API key loaded successfully")
else:
    print("API key not found in environment variables")

# Enable MLflow tracing for DSPy
mlflow.dspy.autolog()

# Optional: Set tracking URI and experiment name
# Use local file storage instead of HTTP server for simplicity
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("DSPy Sentiment Analysis")


API key loaded successfully


<Experiment: artifact_location='mlflow-artifacts:/930195603343704592', creation_time=1761403158123, experiment_id='930195603343704592', last_update_time=1761403158123, lifecycle_stage='active', name='DSPy Sentiment Analysis', tags={'mlflow.experimentKind': 'genai_development'}>

# Configure the LM (Language Model)

In [2]:
# Configure the LM (Language Model) with structured output support
lm = dspy.LM("openai/gpt-4o-mini", model_type="chat")
dspy.settings.configure(lm=lm)

## Use DSPy built-in Module to Build a Sentiment Classifier

In [3]:
class SentimentClassifier(dspy.Signature):
    """Classify the sentiment of a text."""

    text: str = dspy.InputField(desc="input text to classify sentiment")
    sentiment: int = dspy.OutputField(
        desc="sentiment, the higher the more positive", ge=0, le=10
    )

# For my own notes: ge and le are pydantic constraints restricting the range of the sentiment output to be greater than or equal to: 0, less than or equal to: 10

In [4]:
str_signature = dspy.make_signature("text -> sentiment")

# String based signature for the sentiment classifier (not recommended for production but good for testing)

## Create a Module to Interact with the LM

In [5]:
# Test the sentiment classifier
try:
    predict = dspy.Predict(SentimentClassifier)
    output = predict(text="I am feeling pretty happy about this!")
    print("Prediction successful!")
    print(f"Text: 'I am feeling pretty happy about this!'")
    print(f"Sentiment: {output.sentiment}")
except Exception as e:
    print(f"Error occurred: {e}")
    print("Please check your API key and internet connection.")

Prediction successful!
Text: 'I am feeling pretty happy about this!'
Sentiment: 8


In [6]:
# Test with different sentiment examples
test_texts = [
    "I am feeling pretty happy about this!",
    "This is terrible and I hate it.",
    "I feel neutral about this situation.",
    "I'm absolutely thrilled with the results!",
    "This makes me so angry and frustrated."
]

print("Testing sentiment classification with multiple examples:")

for text in test_texts:
    try:
        output = predict(text=text)
        print(f"Text: '{text}'")
        print(f"Sentiment: {output.sentiment}/10")
        print("." * 40)
    except Exception as e:
        print(f"Error processing '{text}': {e}")
        print("." * 40)


Testing sentiment classification with multiple examples:
Text: 'I am feeling pretty happy about this!'
Sentiment: 8/10
........................................
Text: 'This is terrible and I hate it.'
Sentiment: 0/10
........................................
Text: 'I feel neutral about this situation.'
Sentiment: 5/10
........................................
Text: 'I'm absolutely thrilled with the results!'
Sentiment: 10/10
........................................
Text: 'This makes me so angry and frustrated.'
Sentiment: 1/10
........................................


In [7]:
# Alternative ways to view the output

print(f"The sentiment is: {output.sentiment}")
print(f"The sentiment is {output['sentiment']}")

The sentiment is: 1
The sentiment is 1


In [8]:
# Changing the LM use the dspy.configure() function
dspy.configure(lm=dspy.LM("openai/gpt-4o"))
print(predict(text="I am feeling pretty happy!"))

Prediction(
    sentiment=8
)


In [9]:
dspy.configure(lm=dspy.LM("openai/gpt-4o-mini"))

Where is my prompt?
Check dspy.inspect_history(n=1) where n is how many entries you want to pull from the memory.

In [10]:
dspy.inspect_history(n=1)





[34m[2025-10-25T19:00:06.630033][0m

[31mSystem message:[0m

Your input fields are:
1. `text` (str): input text to classify sentiment
Your output fields are:
1. `sentiment` (int): sentiment, the higher the more positive
Constraints: greater than or equal to: 0, less than or equal to: 10
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## text ## ]]
{text}

[[ ## sentiment ## ]]
{sentiment}        # note: the value you produce must be a single int value

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Classify the sentiment of a text.


[31mUser message:[0m

[[ ## text ## ]]
I am feeling pretty happy!

Respond with the corresponding output fields, starting with the field `[[ ## sentiment ## ]]` (must be formatted as a valid Python int), and then ending with the marker for `[[ ## completed ## ]]`.


[31mResponse:[0m

[32m[[ ## sentiment ## ]]
8

[[ ## completed ## ]][0m







# Chain of Thought Built-in module.

In [11]:
cot = dspy.ChainOfThought(SentimentClassifier)

output = cot(text="I am feeling pretty happy!")
print(output)

Prediction(
    reasoning='The text expresses a positive emotion, specifically happiness. The use of the word "happy" indicates a strong positive sentiment. Therefore, the sentiment score is high.',
    sentiment=8
)


In [12]:
dspy.inspect_history(n=1)





[34m[2025-10-25T19:00:06.801904][0m

[31mSystem message:[0m

Your input fields are:
1. `text` (str): input text to classify sentiment
Your output fields are:
1. `reasoning` (str): 
2. `sentiment` (int): sentiment, the higher the more positive
Constraints: greater than or equal to: 0, less than or equal to: 10
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## text ## ]]
{text}

[[ ## reasoning ## ]]
{reasoning}

[[ ## sentiment ## ]]
{sentiment}        # note: the value you produce must be a single int value

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Classify the sentiment of a text.


[31mUser message:[0m

[[ ## text ## ]]
I am feeling pretty happy!

Respond with the corresponding output fields, starting with the field `[[ ## reasoning ## ]]`, then `[[ ## sentiment ## ]]` (must be formatted as a valid Python int), and then ending with the marker for `[[ ## completed ## ]]`.


[31mRes

## Using a different Adapter

In [13]:
dspy.configure(adapter=dspy.JSONAdapter())

In [14]:
print(cot(text="I am feeling pretty happy!"))
dspy.inspect_history(n=1)

Prediction(
    reasoning='The text expresses a positive emotion, specifically happiness, which indicates a strong positive sentiment.',
    sentiment=8
)




[34m[2025-10-25T19:00:06.957753][0m

[31mSystem message:[0m

Your input fields are:
1. `text` (str): input text to classify sentiment
Your output fields are:
1. `reasoning` (str): 
2. `sentiment` (int): sentiment, the higher the more positive
Constraints: greater than or equal to: 0, less than or equal to: 10
All interactions will be structured in the following way, with the appropriate values filled in.

Inputs will have the following structure:

[[ ## text ## ]]
{text}

Outputs will be a JSON object with the following fields.

{
  "reasoning": "{reasoning}",
  "sentiment": "{sentiment}        # note: the value you produce must be a single int value"
}
In adhering to this structure, your objective is: 
        Classify the sentiment of a text.


[31mUser message:[0m

[[ ## text ## ]]
I am feeling pretty happy!

Respond wit

## Building a Program with Custom Module

In [15]:
class QuestionGenerator(dspy.Signature):
    """Genereate a yes or no question in order to guess the celebrity name"""
    past_questions: list[str] = dspy.InputField(desc="past questions asked")
    past_answers: list[bool] = dspy.InputField(desc="past answers")
    new_question: str = dspy.OutputField(desc="new question that can help guess the celebrity name")
    guess_made: bool = dspy.OutputField(desc="If the new_question is a good guess")

class Reflection(dspy.Signature):
    """Provide reflection on the guessing process"""
    correct_celebrity_name: str = dspy.InputField(desc="the correct celebrity name")
    final_guessor_question: str = dspy.InputField(desc="the final guess question asked")
    past_questions: list[str] = dspy.InputField(desc="past questions asked")
    past_answers: list[bool] = dspy.InputField(desc="past answers")

    reflection: str = dspy.OutputField(
        desc="reflection on the guessing process, including what was learned"
    )

def ask(prompt, valid_responses=("y", "n")):
    while True:
        response = input(f"{prompt} ({'/'.join(valid_responses)}): ").lower()
        if response in valid_responses:
            return response
        print(f"Please enter one of: {', '.join(valid_responses)}")

class CelebrityGuess(dspy.Module):
    def __init__(self, max_tries=10):
        super().__init__()

        self.question_generator = dspy.ChainOfThought(QuestionGenerator)
        self.reflection = dspy.ChainOfThought(Reflection)

        self.max_tries = 20

    def forward(self):
        # Start MLflow run for tracking this execution
        start_time = time.time()
        run_name = f"celebrity_guess_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        
        with mlflow.start_run(run_name=run_name, description="Interactive celebrity guessing game execution") as run:
            # Log initial parameters
            mlflow.log_param("max_tries", self.max_tries)
            mlflow.log_param("module_type", "celebrity_guess")
            mlflow.log_param("question_generator_signature", "QuestionGenerator")
            mlflow.log_param("reflection_signature", "Reflection")
            
            celebrity_name = input("Please think of a celebrity name, once you are ready, type the name and press enter...")
            past_questions = []
            past_answers = []

            correct_guess = False
            actual_attempts = 0

            # Log initial state
            mlflow.log_param("target_celebrity", celebrity_name)

            for i in range(self.max_tries):
                actual_attempts = i + 1
                
                try:
                    # Generate question with MLflow tracking
                    question = self.question_generator(
                        past_questions=past_questions,
                        past_answers=past_answers,
                    )
                    
                    # Log question generation details
                    mlflow.log_metric(f"question_{i+1}_generated", 1)
                    mlflow.log_param(f"question_{i+1}_text", question.new_question)
                    mlflow.log_param(f"question_{i+1}_guess_made", question.guess_made)
                    
                    # Get user answer
                    answer = ask(f"{question.new_question}").lower() == "y"
                    past_questions.append(question.new_question)
                    past_answers.append(answer)

                    # Log user interaction
                    mlflow.log_metric(f"attempt_{i+1}_user_answer", 1 if answer else 0)
                    mlflow.log_param(f"attempt_{i+1}_question", question.new_question)
                    mlflow.log_param(f"attempt_{i+1}_answer", answer)

                    # Check for correct guess
                    if question.guess_made and answer:
                        correct_guess = True
                        mlflow.log_metric("final_attempt_number", actual_attempts)
                        break

                except Exception as e:
                    # Log errors
                    mlflow.log_param("error_occurred", True)
                    mlflow.log_param("error_message", str(e))
                    mlflow.log_param("error_attempt", i + 1)
                    mlflow.set_tag("execution_status", "failed")
                    print(f"Error during attempt {i+1}: {e}")
                    break

            # Calculate execution metrics
            execution_time = time.time() - start_time

            if correct_guess:
                print("Yay! I got it right!")
                outcome = "success"
                mlflow.log_metric("success", 1)
            else:
                print("Oops, I couldn't guess it right.")
                outcome = "failed"
                mlflow.log_metric("success", 0)

            # Generate reflection
            try:
                reflection = self.reflection(
                    correct_celebrity_name=celebrity_name,
                    final_guessor_question=question.new_question if 'question' in locals() else "No final question",
                    past_questions=past_questions,
                    past_answers=past_answers,
                )
                reflection_text = reflection.reflection
                
                # Log reflection details
                mlflow.log_param("reflection_generated", True)
                mlflow.log_param("reflection_text", reflection_text)
                
            except Exception as e:
                reflection_text = f"Failed to generate reflection: {e}"
                mlflow.log_param("reflection_generated", False)
                mlflow.log_param("reflection_error", str(e))

            print(reflection_text)

            # Log comprehensive metrics
            mlflow.log_metric("execution_time_seconds", execution_time)
            mlflow.log_metric("actual_attempts", actual_attempts)
            mlflow.log_metric("total_questions_asked", len(past_questions))
            mlflow.log_metric("efficiency_ratio", actual_attempts / self.max_tries if self.max_tries > 0 else 0)
            mlflow.log_metric("questions_per_attempt", len(past_questions) / max(actual_attempts, 1))

            # Log conversation data as artifacts
            conversation_data = {
                "celebrity_name": celebrity_name,
                "outcome": outcome,
                "actual_attempts": actual_attempts,
                "max_tries": self.max_tries,
                "execution_time_seconds": execution_time,
                "success": correct_guess,
                "past_questions": past_questions,
                "past_answers": past_answers,
                "final_question": question.new_question if 'question' in locals() else None,
                "final_answer": answer if 'answer' in locals() else None,
                "reflection": reflection_text,
                "timestamp": datetime.now().isoformat(),
                "run_id": run.info.run_id
            }

            # Save as JSON artifact
            with open("celebrity_guess_results.json", "w") as f:
                json.dump(conversation_data, f, indent=2)
            mlflow.log_artifact("celebrity_guess_results.json")

            # Save conversation log as text artifact
            conversation_log = f"... Celebrity Guessing Game ...\n"
            conversation_log += f"Target Celebrity: {celebrity_name}\n"
            conversation_log += f"Outcome: {'SUCCESS' if correct_guess else 'FAILED'}\n"
            conversation_log += f"Attempts Used: {actual_attempts}/{self.max_tries}\n"
            conversation_log += f"Execution Time: {execution_time:.2f} seconds\n"
            conversation_log += f"Questions Asked: {len(past_questions)}\n\n"
            
            conversation_log += "... Question History ...\n"
            for i, (q, a) in enumerate(zip(past_questions, past_answers)):
                status = "✓" if (i == len(past_questions) - 1 and correct_guess) else "→"
                conversation_log += f"{i+1}. {status} {q}\n"
                conversation_log += f"   Answer: {'Yes' if a else 'No'}\n\n"
            
            conversation_log += f"... Final Reflection ...\n{reflection_text}\n"
            
            with open("celebrity_guess_conversation.txt", "w", encoding="utf-8") as f:
                f.write(conversation_log)
            mlflow.log_artifact("celebrity_guess_conversation.txt")

            # Set tags for easy filtering
            mlflow.set_tag("execution_status", "completed")
            mlflow.set_tag("outcome", outcome)
            mlflow.set_tag("celebrity_category", self._categorize_celebrity(celebrity_name))
            mlflow.set_tag("attempts_category", self._categorize_attempts(actual_attempts))

            # Log final summary metrics
            mlflow.log_metric("completion_rate", 1.0 if correct_guess else 0.0)
            mlflow.log_metric("average_time_per_attempt", execution_time / max(actual_attempts, 1))

        return {
            "celebrity_name": celebrity_name,
            "success": correct_guess,
            "attempts": actual_attempts,
            "execution_time": execution_time,
            "reflection": reflection_text,
            "run_id": run.info.run_id if 'run' in locals() else None
        }

    def _categorize_celebrity(self, celebrity_name):
        """Categorize celebrity for analysis"""
        celebrity_lower = celebrity_name.lower()
        if any(word in celebrity_lower for word in ['actor', 'actress', 'director', 'film', 'movie']):
            return "entertainment"
        elif any(word in celebrity_lower for word in ['singer', 'musician', 'rapper', 'band', 'music']):
            return "music"
        elif any(word in celebrity_lower for word in ['player', 'coach', 'team', 'sport', 'nba', 'football']):
            return "sports"
        elif any(word in celebrity_lower for word in ['president', 'politician', 'leader', 'minister']):
            return "politics"
        else:
            return "other"

    def _categorize_attempts(self, attempts):
        """Categorize attempts for performance analysis"""
        if attempts <= 3:
            return "excellent"
        elif attempts <= 7:
            return "good"
        elif attempts <= 12:
            return "average"
        else:
            return "poor"

In [16]:
celebrity_guess = CelebrityGuess()
celebrity_guess

question_generator.predict = Predict(StringSignature(past_questions, past_answers -> reasoning, new_question, guess_made
    instructions='Genereate a yes or no question in order to guess the celebrity name'
    past_questions = Field(annotation=list[str] required=True json_schema_extra={'desc': 'past questions asked', '__dspy_field_type': 'input', 'prefix': 'Past Questions:'})
    past_answers = Field(annotation=list[bool] required=True json_schema_extra={'desc': 'past answers', '__dspy_field_type': 'input', 'prefix': 'Past Answers:'})
    reasoning = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${reasoning}', '__dspy_field_type': 'output'})
    new_question = Field(annotation=str required=True json_schema_extra={'desc': 'new question that can help guess the celebrity name', '__dspy_field_type': 'output', 'prefix': 'New Question:'})
    guess_made = Field(annotation=bool required=True json_schema_extra={'de

## Save and Load using dspy

In [17]:
celebrity_guess.save("dspy_program/celebrity.json", save_program=False)

In [18]:
celebrity_guess.load("dspy_program/celebrity.json")

In [19]:
celebrity_guess.save("dspy_program/celebrity/", save_program=True)

In [20]:
loaded = dspy.load("dspy_program/celebrity/")

In [21]:
loaded()

Yay! I got it right!
This guessing process highlighted the importance of asking targeted questions that progressively narrow down the options. Each question built upon the previous answers, allowing for a logical deduction of the celebrity's identity. I learned that focusing on specific attributes, such as the sport and team, can significantly streamline the guessing process.
🏃 View run celebrity_guess_20251025_190007 at: http://localhost:5000/#/experiments/930195603343704592/runs/d0e6153dcb364912a493fa2018761c32
🧪 View experiment at: http://localhost:5000/#/experiments/930195603343704592


{'celebrity_name': 'Lebron James',
 'success': True,
 'attempts': 7,
 'execution_time': 29.56076169013977,
 'reflection': "This guessing process highlighted the importance of asking targeted questions that progressively narrow down the options. Each question built upon the previous answers, allowing for a logical deduction of the celebrity's identity. I learned that focusing on specific attributes, such as the sport and team, can significantly streamline the guessing process.",
 'run_id': 'd0e6153dcb364912a493fa2018761c32'}

In [22]:
dspy.inspect_history(n=1)





[34m[2025-10-25T19:00:36.842608][0m

[31mSystem message:[0m

Your input fields are:
1. `correct_celebrity_name` (str): the correct celebrity name
2. `final_guessor_question` (str): the final guess question asked
3. `past_questions` (list[str]): past questions asked
4. `past_answers` (list[bool]): past answers
Your output fields are:
1. `reasoning` (str): 
2. `reflection` (str): reflection on the guessing process, including what was learned
All interactions will be structured in the following way, with the appropriate values filled in.

Inputs will have the following structure:

[[ ## correct_celebrity_name ## ]]
{correct_celebrity_name}

[[ ## final_guessor_question ## ]]
{final_guessor_question}

[[ ## past_questions ## ]]
{past_questions}

[[ ## past_answers ## ]]
{past_answers}

Outputs will be a JSON object with the following fields.

{
  "reasoning": "{reasoning}",
  "reflection": "{reflection}"
}
In adhering to this structure, your objective is: 
        Provide reflectio