In [1]:
import pandas as pd
import dspy

llama3 = dspy.OllamaLocal(model="llama3")
dspy.settings.configure(lm=llama3)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_json("games.json")

In [3]:
import random


def generate_selection():
    num_games = random.randint(5, 20)
    selection_idx = random.sample(range(0, 100), num_games)
    games = df.iloc[selection_idx]
    titles = []
    scores = {}
    for i in range(num_games):
        game = games.iloc[i]
        titles.append(game["name"])
        for genre in game["genres"]:
            if genre in scores:
                scores[genre] += random.randint(-1, 1)
            else:
                scores[genre] = random.randint(-1, 1)
    return titles, scores


dataset = []
for i in range(50):
    titles, scores = generate_selection()
    dataset.append(
        dspy.Example(titles=str(titles), scores=str(scores)).with_inputs(
            "titles", "scores"
        )
    )

testset = []
for i in range(50):
    titles, scores = generate_selection()
    testset.append(
        dspy.Example(titles=str(titles), scores=str(scores)).with_inputs(
            "titles", "scores"
        )
    )

valset = []
for i in range(12):
    titles, scores = generate_selection()
    valset.append(
        dspy.Example(titles=str(titles), scores=str(scores)).with_inputs(
            "titles", "scores"
        )
    )

In [4]:
class Game(dspy.Signature):
    """You are a game recommendation API that receives a list of games
    a user has played and a set of genres with scored preferences.
    You will respond in JSON.  The JSON schema should include
    {
      "recommendations": {
        "games": "list of 5 strings [Super Mario World, Call of Duty, ...]",
        "rationale": "string (reasoning for selecting the recommendedations)"
      }
    }
    """

    titles = dspy.InputField(desc="games that have already been played")
    scores = dspy.InputField(desc="users scored genre preferences")
    output = dspy.OutputField(desc="output in JSON")

In [5]:
import json


def test_validate_answer(example, pred, trace=None):
    try:
        data = json.loads(pred.output)["recommendations"]
    except:
        return 0, False
    if ("games" not in data.keys()) or ("rationale" not in data.keys()):
        return 0, False
    num_wrong = 0
    for game in example.titles:
        if game in data["games"]:
            num_wrong += 1
    acc = (len(data["games"]) - num_wrong) / len(data["games"])
    return acc, True


def validate_answer(example, pred, trace=None):
    try:
        data = json.loads(pred.output)["recommendations"]
    except:
        return 0
    if ("games" not in data.keys()) or ("rationale" not in data.keys()):
        return 0
    num_wrong = 0
    for game in example.titles:
        if game in data["games"]:
            num_wrong += 1
    acc = (len(data["games"]) - num_wrong) / len(data["games"])
    return acc

In [7]:
avg = 0
repeats = 0

for d in testset:
    pred = dspy.Predict(Game)(titles=d.titles, scores=d.scores)
    acc, flag = test_validate_answer(d, pred)
    if acc < 1 and flag:
        repeats += 1
    avg += acc
print(repeats)
print(avg / len(testset))

0
0.76


In [None]:
from dspy.teleprompt import BootstrapFewShotWithRandomSearch

# Set up the optimizer: we want to "bootstrap" (i.e., self-generate) 8-shot examples of your program's steps.
# The optimizer will repeat this 10 times (plus some initial attempts) before selecting its best attempt on the devset.
config = dict(
    max_bootstrapped_demos=4,
    max_labeled_demos=4,
    num_candidate_programs=3,
    num_threads=4,
)

teleprompter = BootstrapFewShotWithRandomSearch(metric=validate_answer, **config)
optimized_program = teleprompter.compile(
    dspy.Predict(Game), trainset=dataset, valset=valset
)

Going to sample between 1 and 4 traces per predictor.
Will attempt to bootstrap 3 candidate sets.


Average Metric: 10.0 / 12  (83.3): 100%|██████████| 12/12 [00:14<00:00,  1.20s/it]


Score: 83.33 for set: [0]
New best sscore: 83.33 for seed -3
Scores so far: [83.33]
Best score: 83.33


Average Metric: 7.0 / 12  (58.3): 100%|██████████| 12/12 [00:14<00:00,  1.21s/it]


Score: 58.33 for set: [4]
Scores so far: [83.33, 58.33]
Best score: 83.33


 12%|█▏        | 6/50 [00:24<02:56,  4.00s/it]


Bootstrapped 4 full traces after 7 examples in round 0.


Average Metric: 0 / 12  (0.0): 100%|██████████| 12/12 [00:16<00:00,  1.34s/it]


Score: 0.0 for set: [4]
Scores so far: [83.33, 58.33, 0.0]
Best score: 83.33
Average of max per entry across top 1 scores: 0.8333333333333334
Average of max per entry across top 2 scores: 0.9166666666666666
Average of max per entry across top 3 scores: 0.9166666666666666
Average of max per entry across top 5 scores: 0.9166666666666666
Average of max per entry across top 8 scores: 0.9166666666666666
Average of max per entry across top 9999 scores: 0.9166666666666666


 10%|█         | 5/50 [00:21<03:10,  4.24s/it]


Bootstrapped 4 full traces after 6 examples in round 0.


Average Metric: 6.0 / 12  (50.0): 100%|██████████| 12/12 [00:14<00:00,  1.23s/it]


Score: 50.0 for set: [4]
Scores so far: [83.33, 58.33, 0.0, 50.0]
Best score: 83.33
Average of max per entry across top 1 scores: 0.8333333333333334
Average of max per entry across top 2 scores: 0.9166666666666666
Average of max per entry across top 3 scores: 0.9166666666666666
Average of max per entry across top 5 scores: 0.9166666666666666
Average of max per entry across top 8 scores: 0.9166666666666666
Average of max per entry across top 9999 scores: 0.9166666666666666


  4%|▍         | 2/50 [00:07<03:05,  3.86s/it]


Bootstrapped 2 full traces after 3 examples in round 0.


Average Metric: 6.0 / 12  (50.0): 100%|██████████| 12/12 [00:17<00:00,  1.48s/it]


Score: 50.0 for set: [4]
Scores so far: [83.33, 58.33, 0.0, 50.0, 50.0]
Best score: 83.33
Average of max per entry across top 1 scores: 0.8333333333333334
Average of max per entry across top 2 scores: 0.9166666666666666
Average of max per entry across top 3 scores: 0.9166666666666666
Average of max per entry across top 5 scores: 0.9166666666666666
Average of max per entry across top 8 scores: 0.9166666666666666
Average of max per entry across top 9999 scores: 0.9166666666666666


  4%|▍         | 2/50 [00:08<03:16,  4.10s/it]


Bootstrapped 1 full traces after 3 examples in round 0.


Average Metric: 12.0 / 12  (100.0): 100%|██████████| 12/12 [00:15<00:00,  1.26s/it]


Score: 100.0 for set: [4]
New best sscore: 100.0 for seed 2
Scores so far: [83.33, 58.33, 0.0, 50.0, 50.0, 100.0]
Best score: 100.0
Average of max per entry across top 1 scores: 1.0
Average of max per entry across top 2 scores: 1.0
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0
6 candidate programs found.


In [None]:
dataset = []
for i in range(50):
    titles, scores = generate_selection()
    dataset.append(
        dspy.Example(titles=str(titles), scores=str(scores)).with_inputs(
            "titles", "scores"
        )
    )

avg = 0
for d in dataset:
    pred = optimized_program(titles=d.titles, scores=d.scores)
    acc = validate_answer(d, pred)
    avg += acc
print(avg / len(dataset))

0.98


In [None]:
optimized_program.save("params.json")

In [None]:
llama3.inspect_history(n=1)




You are a game recommendation API that receives a list of games
a user has played and a set of genres with scored preferences.
You will respond in JSON.  The JSON schema should include
{
  "recommendations": {
    "games": "list of 5 strings [Super Mario World, Call of Duty, ...]",
    "rationale": "string (reasoning for selecting the recommendedations)"
  }
}

---

Follow the following format.

Titles: games that have already been played
Scores: users scored genre preferences
Output: output in JSON

---

Titles: ['Max Payne', 'Super Mario Odyssey', 'The Last of Us Part II', 'Super Mario Bros. 3', 'Doom', 'Minecraft', 'The Witcher 2: Assassins of Kings', 'Hollow Knight']
Scores: {'Shooter': -2, 'Platform': 0, 'Adventure': -2, 'Puzzle': -1, 'Simulator': 0, 'Role-playing (RPG)': -1, 'Indie': 1}
Output: { "recommendations": { "games": ["Castlevania", "Bloodborne", "Dark Souls"], "rationale": "Based on the user's preferences for Shooter and RPG genres with negative scores, we recommend 

'\n\n\nYou are a game recommendation API that receives a list of games\na user has played and a set of genres with scored preferences.\nYou will respond in JSON.  The JSON schema should include\n{\n  "recommendations": {\n    "games": "list of 5 strings [Super Mario World, Call of Duty, ...]",\n    "rationale": "string (reasoning for selecting the recommendedations)"\n  }\n}\n\n---\n\nFollow the following format.\n\nTitles: games that have already been played\nScores: users scored genre preferences\nOutput: output in JSON\n\n---\n\nTitles: [\'Max Payne\', \'Super Mario Odyssey\', \'The Last of Us Part II\', \'Super Mario Bros. 3\', \'Doom\', \'Minecraft\', \'The Witcher 2: Assassins of Kings\', \'Hollow Knight\']\nScores: {\'Shooter\': -2, \'Platform\': 0, \'Adventure\': -2, \'Puzzle\': -1, \'Simulator\': 0, \'Role-playing (RPG)\': -1, \'Indie\': 1}\nOutput: { "recommendations": { "games": ["Castlevania", "Bloodborne", "Dark Souls"], "rationale": "Based on the user\'s preferences for S