# Produce Diffs for Semi-Structured Inputs
We want to find a way to structure Changes from unstructured text.
For this reason I asked ChatGPT to come up with JSON Schema. 
JSON Schema is instrumental here, as it can guide generation of LLMs later.

I later discovered that PyDantic can output JSON Schema directly, so we shall go with that

Chat History with ChatGPT https://chatgpt.com/share/2df4de1a-287d-4015-88fe-4e25c75a8efe

In [1]:
%pip install pydantic dspy-ai python-dotenv

Collecting dspy-ai
  Using cached dspy_ai-2.4.9-py3-none-any.whl.metadata (38 kB)
Collecting python-dotenv
  Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting joblib~=1.3.2 (from dspy-ai)
  Using cached joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting openai<2.0.0,>=0.28.1 (from dspy-ai)
  Downloading openai-1.34.0-py3-none-any.whl.metadata (21 kB)
Collecting regex (from dspy-ai)
  Downloading regex-2024.5.15-cp312-cp312-macosx_11_0_arm64.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.9/40.9 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ujson (from dspy-ai)
  Downloading ujson-5.10.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (9.3 kB)
Collecting datasets<3.0.0,~=2.14.6 (from dspy-ai)
  Using cached datasets-2.14.7-py3-none-any.whl.metadata (19 kB)
Collecting optuna (from dspy-ai)
  Using cached optuna-3.6.1-py3-none-any.whl.metadata (17 kB)
Collecting structlog (from dspy-ai)
  Using cached 

In [2]:
import re
from typing import List, Optional
from pydantic import BaseModel, Field, validator


class MethodDetails(BaseModel):
    # pattern=r'^(\w+\s)|(\*)*\([^()]*\)$|^\*$'
    method_signature: str = Field(
        ...,
        description="The signature of the deprecated method.",
        examples=["foo()", "bar(int a, int b)", "baz()", "*()", "*"],
    )
    replacement: Optional[str] = Field(
        None,
        description="The suggested replacement for the deprecated method. If possible use a method signature.",
    )


class ClassBreakingChange(BaseModel):
    class_name: str = Field(
        ..., description="The name of the class affected by the breaking change."
    )
    package_name: Optional[str] = Field(
        None, description="The name of the package where the class is defined."
    )
    deprecations: List[MethodDetails] = Field(
        [], description="List of deprecated methods in the class."
    )
    removals: List[MethodDetails] = Field(
        [], description="List of removed methods in the class."
    )
    notes: Optional[str] = Field(
        None, description="Additional notes about the breaking change."
    )


class PackageBreakingChange(BaseModel):
    package_name: Optional[str] = Field(
        None,
        description="The name of the package where the change happened. Use this only if we are in a non-class context (no OOP).",
    )
    deprecations: List[MethodDetails] = Field(
        [], description="List of deprecated methods in the package."
    )
    removals: List[str] = Field(
        [], description="List of removed methods in the package."
    )
    notes: Optional[str] = Field(
        None, description="Additional notes about the breaking change."
    )

In [3]:
class OOPBreakingChanges(BaseModel):
    class_breaking_changes: List[ClassBreakingChange] = Field(
        ...,
        description="List of breaking changes (deprecations and removals). Use this with Classes in an OOP context.",
    )
    version: str = Field(
        ...,
        description="The version of the API where the breaking changes were introduced.",
    )

In [4]:
class MixedBreakingChanges(BaseModel):
    class_breaking_changes: List[ClassBreakingChange] = Field(
        ...,
        description="List of breaking changes (deprecations and removals). Use this with Classes in an OOP context.",
    )
    package_breaking_changes: List[PackageBreakingChange] = Field(
        [],
        description="List of breaking changes (deprecations and removals). Use this in a procedural context.",
    )
    version: str = Field(
        ...,
        description="The version of the API where the breaking changes were introduced.",
    )

In [5]:
# Generate an example for OOPBreakingChanges with a single class breaking change
import json


example = OOPBreakingChanges(
    class_breaking_changes=[
        ClassBreakingChange(
            class_name="MyClass",
            package_name="my.package",
            deprecations=[
                MethodDetails(method_signature="foo()", replacement="bar()"),
                MethodDetails(method_signature="baz(int, str)"),
            ],
            removals=[
                MethodDetails(method_signature="qux(int, str)"),
            ],
            notes="This is a breaking change because of reasons.",
        )
    ],
    version="1.0.0",
)
example.model_dump_json(indent=4)

'{\n    "class_breaking_changes": [\n        {\n            "class_name": "MyClass",\n            "package_name": "my.package",\n            "deprecations": [\n                {\n                    "method_signature": "foo()",\n                    "replacement": "bar()"\n                },\n                {\n                    "method_signature": "baz(int, str)",\n                    "replacement": null\n                }\n            ],\n            "removals": [\n                {\n                    "method_signature": "qux(int, str)",\n                    "replacement": null\n                }\n            ],\n            "notes": "This is a breaking change because of reasons."\n        }\n    ],\n    "version": "1.0.0"\n}'

In [6]:
import json


main_model_schema = OOPBreakingChanges.model_json_schema()
print(json.dumps(main_model_schema, indent=2))

{
  "$defs": {
    "ClassBreakingChange": {
      "properties": {
        "class_name": {
          "description": "The name of the class affected by the breaking change.",
          "title": "Class Name",
          "type": "string"
        },
        "package_name": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The name of the package where the class is defined.",
          "title": "Package Name"
        },
        "deprecations": {
          "default": [],
          "description": "List of deprecated methods in the class.",
          "items": {
            "$ref": "#/$defs/MethodDetails"
          },
          "title": "Deprecations",
          "type": "array"
        },
        "removals": {
          "default": [],
          "description": "List of removed methods in the class.",
          "items": {
            "$ref": "#/

Ok now for the Litmus test, lets start experimenting with DSPy to check whether this is feasible.

In [7]:
import dspy, dotenv, os

dotenv.load_dotenv(".env")  # load OpenAI API key from .env file
lm = dspy.OpenAI(model="gpt-3.5-turbo", max_tokens=4000)
dspy.settings.configure(lm=lm)

# predictor = dspy.Predict("question -> answer")
# print(predictor(question="What is the capital of France?"))

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
from collections import defaultdict
import os

training_data: defaultdict[str, str] = {}
# load all md files in guava.wiki/validate

for file in os.listdir("train/guava"):
    if file.endswith(".md"):
        with open(f"train/guava/{file}", "r") as f:
            with open(f"train/guava/{file.replace('.md', '.json')}", "r") as f2:
                json_content = f2.read()
                OOPBreakingChanges.model_validate_json(json_content)
                training_data[file] = {
                    "prompt": f.read(),
                    "result": json.loads(json_content),
                }

training_data

{'Release17.md': {'prompt': "# Guava Release 17.0: Release Notes\n\n## API Changes\n\n[Full JDiff Report](http://google.github.io/guava/releases/17.0/api/diffs/) of changes since release 16.0.\n\nTo build a combined report of the API changes between release 17.0 and any older release, check out our docs tree and run `jdiff/jdiff.sh` with the previous release number as argument (example: `jdiff.sh 5.0`).\n\n### Significant API additions and changes\n\n#### common.base\n\n`Verify` and `VerifyException`\n\n`Converter.from(Function<A, B>, Function<B, A>)`\n\n#### common.cache\n\n`CacheLoader.asyncReloading(CacheLoader<K, V>, Executor)`\n\n#### common.io\n\n`ByteStreams.newDataInput(ByteArrayInputStream)`\n\n`ByteStreams.newDataOutput(ByteArrayOutputStream)`\n\n`Closeables.closeQuietly(InputStream)`\n\n`Closeables.closeQuietly(Reader)`\n\n#### common.net\n\n`HostAndPort.fromHost(String)`\n\n#### common.util.concurrent\n\n`Futures.inCompletionOrder(Iterable<ListenableFuture<T>>)`\n\n`MoreExe

In [9]:
from dspy import InputField, OutputField, Signature
from dspy.functional import TypedPredictor

from enum import Enum

from pydantic import BaseModel, ValidationError


class FruitEnum(str, Enum):
    pear = "pear"
    banana = "banana"


# The signature is the main DSpy object. Note that we have types for the input and output fields,
# which was not possible beofore.
class BreakingChangesSignature(Signature):
    prompt: str = InputField()
    parsed_changes: OOPBreakingChanges = OutputField()


predictor = TypedPredictor(BreakingChangesSignature)
# prediction = predictor(
#     prompt=validation_data["Release20.md"]
# )

In [24]:
lm.inspect_history(n=3)




Given the fields `prompt`, produce the fields `parsed_changes`.

---

Follow the following format.

Prompt: ${prompt}
Parsed Changes: ${parsed_changes}. Respond with a single JSON object. JSON Schema: {"$defs": {"ClassBreakingChange": {"properties": {"class_name": {"description": "The name of the class affected by the breaking change.", "title": "Class Name", "type": "string"}, "package_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "The name of the package where the class is defined.", "title": "Package Name"}, "deprecations": {"default": [], "description": "List of deprecated methods in the class.", "items": {"$ref": "#/$defs/MethodDetails"}, "title": "Deprecations", "type": "array"}, "removals": {"default": [], "description": "List of removed methods in the class.", "items": {"$ref": "#/$defs/MethodDetails"}, "title": "Removals", "type": "array"}, "notes": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "

'\n\n\nGiven the fields `prompt`, produce the fields `parsed_changes`.\n\n---\n\nFollow the following format.\n\nPrompt: ${prompt}\nParsed Changes: ${parsed_changes}. Respond with a single JSON object. JSON Schema: {"$defs": {"ClassBreakingChange": {"properties": {"class_name": {"description": "The name of the class affected by the breaking change.", "title": "Class Name", "type": "string"}, "package_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "The name of the package where the class is defined.", "title": "Package Name"}, "deprecations": {"default": [], "description": "List of deprecated methods in the class.", "items": {"$ref": "#/$defs/MethodDetails"}, "title": "Deprecations", "type": "array"}, "removals": {"default": [], "description": "List of removed methods in the class.", "items": {"$ref": "#/$defs/MethodDetails"}, "title": "Removals", "type": "array"}, "notes": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "desc

In [11]:
from dspy import Example

devset = [
    Example(
        prompt=test["prompt"],
        solution=OOPBreakingChanges(**test["result"]),
    ).with_inputs("prompt", "test", "entry_point")
    for test in training_data.values()
]

In [12]:
split_count = int(len(devset) * 0.8)

trainset = devset[:split_count]
testset = devset[split_count:]

print(len(trainset), len(testset))

11 3


In [13]:
%pip install langchain rapidfuzz jinja2 pandas

Collecting langchain
  Downloading langchain-0.2.3-py3-none-any.whl.metadata (6.9 kB)
Collecting rapidfuzz
  Using cached rapidfuzz-3.9.3-cp312-cp312-macosx_11_0_arm64.whl.metadata (12 kB)
Collecting langchain-core<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_core-0.2.5-py3-none-any.whl.metadata (5.8 kB)
Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.1-py3-none-any.whl.metadata (2.2 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.77-py3-none-any.whl.metadata (13 kB)
Collecting tenacity<9.0.0,>=8.1.0 (from langchain)
  Using cached tenacity-8.3.0-py3-none-any.whl.metadata (1.2 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.0->langchain)
  Using cached jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting packaging<24.0,>=23.2 (from langchain-core<0.3.0,>=0.2.0->langchain)
  Using cached packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Collecting

In [14]:
from langchain.evaluation.parsing.json_distance import JsonEditDistanceEvaluator

json_distance_evaluator = JsonEditDistanceEvaluator()
json_distance_evaluator.evaluate_strings(
    prediction='{"a": 1, "b": 2, "c": "4"}', reference='{"a": 1, "b": 3}'
)

{'score': 0.42857142857142855}

In [15]:
from dspy import Prediction


# Define the signature for automatic assessments.
class Assess(dspy.Signature):
    """Assess the quality of parsing breaking changes into JSON. Assess based on the JSON Schema you are given. Breaking Changes are deprecations and removals only. You have to judge the accuracy of the representation. If there are no breaking changes in the text, there should be none in the JSON. Metadata is not nescessary. We only care about capturing the impacted classes and methods."""

    model = dspy.InputField(
        desc="The model that was used to parse the breaking changes."
    )
    assessed_text = dspy.InputField()
    assessment_question = dspy.InputField()
    parsed_result = dspy.InputField()
    assessment_answer = dspy.OutputField(
        desc="Rate on a scale of 1-5, 5 being best. Dont justify, just output numeric value.",
        max_value=5,
        min_value=1,
    )
    # assessment_justification = dspy.OutputField(desc="Justify the assessment.")


def metric(example: Example, pred: Prediction, trace=None):

    # print(pred, example)
    if isinstance(pred, Prediction):
        predicted_changes = pred.parsed_changes.model_dump()
    elif isinstance(pred, Example):
        predicted_changes = pred.solution.model_dump()
    reference_changes = example.solution.model_dump()

    change_metric = json_distance_evaluator.evaluate_strings(
        prediction=predicted_changes, reference=reference_changes
    )

    if predicted_changes == reference_changes:
        return 1.0

    if change_metric == 1.0:
        return 1.0

    question = f"Are all breaking changes correctly transposed into the target format?"

    # print("Example:", example)
    # print("Prediction:", pred)
    # print("Change Metric:", change_metric)
    # print("Trace:", trace)

    correct = dspy.Predict(Assess)(
        model=json.dumps(OOPBreakingChanges.model_json_schema()),
        assessed_text=example.prompt,
        parsed_result=json.dumps(predicted_changes),
        assessment_question=question,
    )

    # print(correct)

    # print(lm.inspect_history(n=3))

    # check if if not metric.assessment_answer numeric

    try:
        correct = int(correct.assessment_answer)
        return correct / 5
    except ValueError:
        return 0.0


print("Score with the original model:")
print(100 * sum(metric(example, example) for example in testset) / len(testset))

Score with the original model:
100.0


In [16]:
from dspy.evaluate.evaluate import Evaluate

evaluator = Evaluate(
    devset=testset,
    num_threads=30,
    display_progress=True,
    display_table=5,
    max_errors=1,
)
res = evaluator(predictor, metric)
print(res)

Average Metric: 3.0 / 3  (100.0): 100%|██████████| 3/3 [00:19<00:00,  6.37s/it]


Unnamed: 0,prompt,solution,parsed_changes,metric
0,# Guava Release 14.0: Release Notes ## API Changes [Full JDiff Report](http://google.github.io/guava/releases/14.0.1/api/diffs/) of changes since release 13.0.1 To build a combined report of the API...,"class_breaking_changes=[ClassBreakingChange(class_name='Stopwatch', package_name='common.util.concurrent', deprecations=[MethodDetails(method_signature='elapsedMillis()', replacement='elapsed()'), MethodDetails(method_signature='elapsedTime()', replacement='elapsed()')], removals=[], notes=None)] version='14.0'","class_breaking_changes=[ClassBreakingChange(class_name='Equivalences', package_name=None, deprecations=[MethodDetails(method_signature='static methods moved into Equivalence', replacement=None)], removals=[], notes=None), ClassBreakingChange(class_name='DiscreteDomains', package_name=None, deprecations=[MethodDetails(method_signature='static methods moved into DiscreteDomain', replacement=None)], removals=[], notes=None), ClassBreakingChange(class_name='Stopwatch', package_name=None, deprecations=[MethodDetails(method_signature='elapsedMillis and elapsedTime...",✔️ [1.0]
1,# Guava Release 20.0: Release Notes ## API Changes [Full JDiff Report](http://google.github.io/guava/releases/20.0/api/diffs/) of changes since release 19.0. ### Significant API additions and changes #### New...,"class_breaking_changes=[ClassBreakingChange(class_name='Predicates', package_name='common.base', deprecations=[MethodDetails(method_signature='assignableFrom(Class)', replacement='subtypeOf(Class)')], removals=[], notes=None), ClassBreakingChange(class_name='Throwables', package_name='common.base', deprecations=[MethodDetails(method_signature='propagate()', replacement=None), MethodDetails(method_signature='propagateIfInstanceOf()', replacement=None), MethodDetails(method_signature='propagateIfPossible()', replacement=None)], removals=[], notes=None), ClassBreakingChange(class_name='ConcurrentHashMultiset', package_name='common.collect', deprecations=[MethodDetails(method_signature='create(MapMaker)', replacement='create(ConcurrentMap)')], removals=[], notes=None), ClassBreakingChange(class_name='Iterators', package_name='common.collect', deprecations=[],...","class_breaking_changes=[ClassBreakingChange(class_name='common.base.CharMatcher', package_name='common.base', deprecations=[MethodDetails(method_signature='constants deprecated in favor of static factory methods', replacement='Use static factory methods instead of constants')], removals=[], notes='Constants will be removed after a 2-year...",✔️ [1.0]
2,# Guava Release 10.0.1: Release Notes ## API Changes [Full JDiff Report](http://google.github.io/guava/releases/10.0.1/api/diffs/) of changes since release 9.0 To build a combined report of the API...,class_breaking_changes=[] version='10.0.1',class_breaking_changes=[] version='10.0.1',✔️ [1.0]


100.0


In [17]:
from dspy.teleprompt.bootstrap import BootstrapFewShot
from dspy.teleprompt.random_search import BootstrapFewShotWithRandomSearch

print("Compiling...")
# compiled = BootstrapFewShotWithRandomSearch(
compiled = BootstrapFewShot(
    metric=metric,
    # num_threads=30,
    # num_candidate_programs=5,
    # max_labeled_demos=8,
    max_errors=100,
).compile(
    predictor,
    trainset=trainset,
)

Compiling...


 36%|███▋      | 4/11 [00:16<00:29,  4.20s/it]


In [18]:
compiled.save("compiled-optimized")
compiled

TypedPredictor(BreakingChangesSignature(prompt -> parsed_changes
    instructions='Given the fields `prompt`, produce the fields `parsed_changes`.'
    prompt = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Prompt:', 'desc': '${prompt}'})
    parsed_changes = Field(annotation=OOPBreakingChanges required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Parsed Changes:', 'desc': '${parsed_changes}'})
))

In [26]:
compiled(
    prompt="{'type':'japicmp-diff', 'version':'1.0', 'changes': {'setCompatibleFuture': ['METHOD_REMOVED'], 'setFuture': ['METHOD_REMOVED'], 'getContext': ['METHOD_REMOVED']}}"
)

Prediction(
    parsed_changes=OOPBreakingChanges(class_breaking_changes=[], version='N/A')
)