In [1]:
import dspy
from dspy.evaluate import Evaluate

# Configure LLM
turbo = dspy.OpenAI(model='gpt-3.5-turbo')
dspy.settings.configure(lm=turbo)

# Signature for code summarization
class GenerateCodeSummary(dspy.Signature):
    """Generate a concise and accurate summary of the provided code."""
    
    code = dspy.InputField(desc="The source code to summarize")
    language = dspy.InputField(desc="Programming language of the code")
    summary = dspy.OutputField(desc="A clear, accurate summary of what the code does")

# Signature for summary evaluation
class EvaluateCodeSummary(dspy.Signature):
    """Evaluate the quality of a code summary based on multiple criteria."""
    
    code = dspy.InputField(desc="Original source code")
    summary = dspy.InputField(desc="Generated summary to evaluate")
    accuracy_score = dspy.OutputField(desc="Score 1-10 on factual accuracy")
    completeness_score = dspy.OutputField(desc="Score 1-10 on completeness")
    conciseness_score = dspy.OutputField(desc="Score 1-10 on conciseness")
    rationale = dspy.OutputField(desc="Detailed explanation of the scores")

class CodeSummarizer(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate = dspy.ChainOfThought(GenerateCodeSummary)
        
    def forward(self, code, language):
        return self.generate(code=code, language=language)

class SummaryEvaluator(dspy.Module):
    def __init__(self):
        super().__init__()
        self.evaluate = dspy.ChainOfThought(EvaluateCodeSummary)
        
    def forward(self, code, summary):
        return self.evaluate(code=code, summary=summary)

# Example usage
def evaluate_summary(code: str, language: str) -> dict:
    summarizer = CodeSummarizer()
    evaluator = SummaryEvaluator()
    
    # Generate summary
    summary_result = summarizer(code=code, language=language)
    
    # Evaluate summary
    evaluation = evaluator(code=code, summary=summary_result.summary)
    
    return {
        'summary': summary_result.summary,
        'accuracy': evaluation.accuracy_score,
        'completeness': evaluation.completeness_score,
        'conciseness': evaluation.conciseness_score,
        'rationale': evaluation.rationale
    }

# Example code to summarize
sample_code = '''
def fibonacci(n):
    if n <= 1:
        return n
    else:
        a, b = 0, 1
        for _ in range(n - 1):
            a, b = b, a + b
        return b
'''

# Run evaluation
result = evaluate_summary(sample_code, "python")
print("Generated Summary:", result['summary'])
print("\nEvaluation Scores:")
print(f"Accuracy: {result['accuracy']}/10")
print(f"Completeness: {result['completeness']}/10")
print(f"Conciseness: {result['conciseness']}/10")
print("\nRationale:", result['rationale'])

 		You are using the client GPT3, which will be removed in DSPy 2.6.
 		Changing the client is straightforward and will let you use new features (Adapters) that improve the consistency of LM outputs, especially when using chat LMs. 

 		Learn more about the changes and how to migrate at
 		https://github.com/stanfordnlp/dspy/blob/main/examples/migration.ipynb


Generated Summary: The code defines a function in Python that calculates the nth Fibonacci number using a loop.

Evaluation Scores:
Accuracy: /10
Completeness: 9

Completeness Score: 8/10
Conciseness: 7/10

Rationale: Accuracy Score: 9
The summary accurately describes the purpose of the code and how it calculates the nth Fibonacci number using a loop. However, it could be improved by mentioning that the function returns the Fibonacci number.

Completeness Score: 8
The summary provides a good overview of what the code does, but it could be more detailed by explaining the logic behind the Fibonacci sequence and how the code achieves the calculation.

Conciseness Score: 7
The summary is concise and to the point, but it could be improved by including more specific details about the code implementation and how it works.
