# Final Project: ReCOGS Parsing

In [174]:
__author__ = "Sam Portnow"
__version__ = "CS224u, Spring 2025"

In [465]:
import pandas as pd
import dspy
from dotenv import load_dotenv
import re
from compgen import recogs_exact_match
import numpy as np

In [200]:
import os
# keep the API keys in a `.env` file in the local root directory
load_dotenv()
openai_key = os.getenv('OPENAI_API_KEY')  # use the .env file as it is a good practice to keep keys outside of one's code


Read in the ReCOGS training data.

In [176]:
train_set = pd.read_csv('./data/RECOGStrain.tsv', sep = '\t', header = None)

In [177]:
train_set.head()

Unnamed: 0,0,1,2
0,A rose was helped by a dog .,rose ( 1 ) ; dog ( 36 ) ; help ( 40 ) AND them...,in_distribution
1,The sailor dusted a boy .,* sailor ( 0 ) ; boy ( 53 ) ; dust ( 4 ) AND a...,in_distribution
2,Emma rolled a teacher .,Emma ( 27 ) ; teacher ( 6 ) ; roll ( 26 ) AND ...,in_distribution
3,Evelyn rolled the girl .,Evelyn ( 54 ) ; * girl ( 14 ) ; roll ( 31 ) AN...,in_distribution
4,A cake was forwarded to Levi by Charlotte .,cake ( 49 ) ; Levi ( 7 ) ; Charlotte ( 15 ) ; ...,in_distribution


Define a function to read in the vocab files from Baroni and Lake (2023)

In [178]:
def read_file(filename):  
    with open(filename) as fid:
        words = fid.readlines()
    words = [w.rstrip('\n').strip() for w in words]
    words = [w for w in words if len(w)>0]
    return words

There are common nouns, proper nouns, and verbs

In [179]:

recogs_common = read_file('./COGS/vocab_common_nouns.txt')
recogs_proper = read_file('./COGS/vocab_proper_nouns.txt')
recogs_verb_dative =  read_file('./COGS/vocab_verbs_dative_input.txt')
recogs_verb_inf =  read_file('./COGS/vocab_verbs_infinitive.txt')

Below I make a dictionrary that maps words to their randomly shufffled versions. This is ther permutate step of Baroni and Lake (2023)

In [180]:
import random

symbols = {
    'common': [recogs_common, random.sample(recogs_common, len(recogs_common))],
    'proper': [recogs_proper, random.sample(recogs_proper, len(recogs_proper))],
    'verb_dative': [recogs_verb_dative, random.sample(recogs_verb_dative, len(recogs_verb_dative))],
    'verb_inf': [recogs_verb_inf, random.sample(recogs_verb_inf, len(recogs_verb_inf))],
}

In [181]:
symbols['common'].append(dict(zip(symbols['common'][1], symbols['common'][0])))
symbols['proper'].append(dict(zip(symbols['common'][1], symbols['proper'][0])))
symbols['verb_dative'].append(dict(zip(symbols['verb_dative'][1], symbols['verb_dative'][0])))
symbols['verb_inf'].append(dict(zip(symbols['verb_inf'][1], symbols['verb_inf'][0])))


In [182]:
train_set.columns = ['input', 'output', 'type']
examples = train_set.sample(100)

The loop below creates a new set of supporting examples. It loops through each row in teh dataset. For each out symbol in the output field, it looks for a match in the training set. 

In [183]:
supporting_examples = []
supporting_examples_input = []
for i, row in examples.iterrows():
    out_symbols = row['output'].split()
    se = []
    for out in out_symbols:
        for key in symbols.keys():
            if out in symbols[key][0]:
                if out in symbols[key][2].keys():
                    se = (train_set[train_set['output'].str.contains(out)]['output'].values[0])
                    sei = (train_set[train_set['output'].str.contains(out)]['input'].values[0])
                    
    supporting_examples.append(se)
    supporting_examples_input.append(sei)
examples['supporting_examples'] = supporting_examples
examples['supporting_examples_input'] = supporting_examples_input

The code below takes words in the output field and supporting example and replaces them with their permuted version.

A longer description of the code is below:

Loop through the example dataset;
For each example, loop through the output field;
For each word in the output field, check if it is in the permuted dictionary;
If it is, replace the word in the output field with the permuted version;
If it is not, leave the word as is;

In [None]:
"""
The code below transforms output sentences to the new vocabulary
"""

new_output = []
new_supporting_examples = []
for i, row in examples.iterrows():
    out_symbols = row['output'].split()
    output_sentence = []
    new_support = None
    for out in out_symbols:
        break_flag = False
        for key in symbols.keys():
            if out in symbols[key][0]:
                if out in symbols[key][2].keys():
                    output_sentence.append(symbols[key][2][out])
                    new_support = (row['supporting_examples'].replace(out, symbols[key][2][out]))
                    break_flag = True
        if break_flag == False:
                output_sentence.append(out)
    if output_sentence:
        new_output.append(' '.join(output_sentence))
    if not new_support:
        new_supporting_examples.append(row['supporting_examples'])
    else:
        new_supporting_examples.append(new_support)     



In [185]:
examples['new_output'] = new_output
examples['new_supporting_examples'] = new_supporting_examples

Look at the dataset to make sure everthing looks good

In [186]:
rdn = examples.sample(10)
for i, row in rdn.iterrows():
    print ('Input: ', row['input'])
    print('Type: ', row['type'])
    print('Original Output: ', row['output'])
    print('Transformed Output: ', row['new_output'])
    print('Original Supporting Examples: ', row['supporting_examples'])
    print('Transformed Supporting Examples: ', row['new_supporting_examples'])
    print('\n')


Input:  A girl poked the donut .
Type:  in_distribution
Original Output:  girl ( 29 ) ; * donut ( 41 ) ; poke ( 34 ) AND agent ( 34 , 29 ) AND theme ( 34 , 41 )
Transformed Output:  cupboard ( 29 ) ; * journalist ( 41 ) ; poke ( 34 ) AND agent ( 34 , 29 ) AND theme ( 34 , 41 )
Original Supporting Examples:  * donut ( 42 ) ; study ( 53 ) AND theme ( 53 , 42 )
Transformed Supporting Examples:  * journalist ( 42 ) ; study ( 53 ) AND theme ( 53 , 42 )


Input:  A drink was loved by the horse .
Type:  in_distribution
Original Output:  drink ( 19 ) ; * horse ( 53 ) ; love ( 26 ) AND theme ( 26 , 19 ) AND agent ( 26 , 53 )
Transformed Output:  cobra ( 19 ) ; * buyer ( 53 ) ; love ( 26 ) AND theme ( 26 , 19 ) AND agent ( 26 , 53 )
Original Supporting Examples:  horse ( 25 ) ; * cake ( 11 ) ; table ( 31 ) ; * mouse ( 34 ) ; give ( 6 ) AND agent ( 6 , 25 ) AND theme ( 6 , 11 ) AND recipient ( 6 , 34 ) AND nmod . beside ( 11 , 31 )
Transformed Supporting Examples:  buyer ( 25 ) ; * cake ( 11 ) ; 

In [187]:
examples.head()

Unnamed: 0,input,output,type,supporting_examples,supporting_examples_input,new_output,new_supporting_examples
5219,A girl was handed a donut on the rug by the boy .,girl ( 17 ) ; donut ( 35 ) ; * rug ( 2 ) ; * b...,in_distribution,* sailor ( 0 ) ; boy ( 53 ) ; dust ( 4 ) AND a...,The sailor dusted a boy .,cupboard ( 17 ) ; journalist ( 35 ) ; * valve ...,* sailor ( 0 ) ; pyramid ( 53 ) ; dust ( 4 ) A...
20184,The ball was eaten by a student .,* ball ( 6 ) ; student ( 48 ) ; eat ( 52 ) AND...,in_distribution,"* captain ( 4 ) ; eat ( 18 ) AND agent ( 18 , 4 )",The captain ate .,* needle ( 6 ) ; bible ( 48 ) ; dust ( 52 ) AN...,"* captain ( 4 ) ; dust ( 18 ) AND agent ( 18 ,..."
3436,Ava dreamed that a pizza was sent to a girl by...,Ava ( 0 ) ; pizza ( 9 ) ; girl ( 48 ) ; Liam (...,in_distribution,Evelyn ( 54 ) ; * girl ( 14 ) ; roll ( 31 ) AN...,Evelyn rolled the girl .,Ava ( 0 ) ; coffin ( 9 ) ; cupboard ( 48 ) ; L...,Evelyn ( 54 ) ; * cupboard ( 14 ) ; roll ( 31 ...
1524,Olivia forwarded the boy the cake in a garden .,Olivia ( 48 ) ; * boy ( 23 ) ; * cake ( 51 ) ;...,in_distribution,Olivia ( 16 ) ; cake ( 13 ) ; garden ( 36 ) ; ...,Olivia was handed a cake in a garden .,Olivia ( 48 ) ; * pyramid ( 23 ) ; * mother ( ...,Olivia ( 16 ) ; cake ( 13 ) ; table ( 36 ) ; h...
15733,A donut was valued by James .,donut ( 20 ) ; James ( 0 ) ; value ( 49 ) AND ...,in_distribution,"* donut ( 42 ) ; study ( 53 ) AND theme ( 53 ,...",The donut was studied .,journalist ( 20 ) ; James ( 0 ) ; value ( 49 )...,* journalist ( 42 ) ; study ( 53 ) AND theme (...


Function for the plan prompt

In [393]:
def get_plan_prompt(examples):

    meta_learning = examples.sample(1)
    more_examples = examples.sample(7)

    inputs = pd.concat([meta_learning, more_examples])['input'].to_list()
    outputs = pd.concat([meta_learning, more_examples])['new_output'].to_list()
    query_input = meta_learning['supporting_examples_input'].values[0]
    query_output = meta_learning['new_supporting_examples'].values[0]


    inputs = "\n".join(inputs)
    outputs = "\n".join(outputs)       

    plan_prompt = f"""

    Imagine you are an expert in semantic parsing of natural language.

    You must develop a plan to convert each sentence into its logical form.

    A series of inputs and outputs are given to you. For example:

    Input: "A bat was cooked ."
    Output: "bat ( 59 ) ; cook ( 2 ) AND theme ( 2 , 59 )"

    To ensure that you focus on the semantic meaning of the sentences, the words themselves will replaced with other, equivalent words.
    For example, "bat" might be replaced with "cat", and "cook" might be replaced with "eat".

    Below is a full example of the input and output sentences, with the words replaced with other, equivalent words:

    Input: {inputs}
    Output: {outputs}
    Query Input: {query_input}
    Query Output: {query_output}

    For every example, the possible roles that words can take are as follows:

    - Nouns (including proper nouns). Nouns are denoted with numbers in parentheses.
    - Verbs. Verbs are denoted with numbers in parentheses.
    - Agents (who do events). Agents are denoted by agent followed by the number(s) in parentheses referring to the noun and verb.
    - Themes (who undergo events). Themes are denoted by theme followed by the number(s) in parentheses referring to the noun and verb.
    - Recipients (who receive events). Recipients are denoted by recipient followed by the number(s) in parentheses referring to the noun and verb.
    - Nmod (noun modifiers). Nmod are denoted by nmod followed by the number(s) in parentheses referring to the part of the sentence that is modified.
    - Ccomp (comparative clauses). Ccomp are denoted by ccomp followed by the number(s) in parentheses referring to the part of the sentence that it is a complement of.


    Your output should be a plan for converting a query input into its logical form.



    """    

    return plan_prompt


Function for the vote prompt

In [394]:
def vote_prompt(plan, n = 10):
    upper = n - 1
    vote_prompt = f""""
    Imagine you are a group of five experts and you are given a list of plans to convert a sentence into its logical form.

    Your task to score each plan on a scale of 1 to 10, where 1 is "very bad" and 10 is "very good".

    After voting on each plan, select the plan that received the most votes.

    The plans are as follows: {plan}

    Remember for RECOGS that the possible roles that words can take are as follows:

    - Nouns (including proper nouns). Nouns are denoted with numbers in parentheses.
    - Verbs. Verbs are denoted with numbers in parentheses.
    - Agents (who do events). Agents are denoted by agent followed by the number(s) in parentheses referring to the noun and verb.
    - Themes (who undergo events). Themes are denoted by theme followed by the number(s) in parentheses referring to the noun and verb.
    - Recipients (who receive events). Recipients are denoted by recipient followed by the number(s) in parentheses referring to the noun and verb.
    - Nmod (noun modifiers). Nmod are denoted by nmod followed by the number(s) in parentheses referring to the part of the sentence that is modified.
    - Ccomp (comparative clauses). Ccomp are denoted by ccomp followed by the number(s) in parentheses referring to the part of the sentence that it is a complement of.


    The output should be a single number between 0 and {upper} to indicate which of the {n} plans you chose.  For example, if you chose the first plan, your output should be "0". If you chose the second plan, your output should be "1", and so on. 
    Do not return more than one number.

    """    
    return vote_prompt

Function for the prompt to apply the plan

In [None]:
def apply_plan(generated_plan, test_inputs):
    application_prompt = f"""

    You are given a plan to parse a sentence into its logical form. The plan is as follows:

    Plan: {generated_plan}

    Your output should be the logical form of the sentence of the input sentence.

    In your output, assume the words are not permuted. For example, if the input sentence is "A bat was cooked", your output should be "bat ( 59 ) ; cook ( 2 ) AND theme ( 2 , 59 )".

    For every example, the possible roles that words can take are as follows:

    - Nouns (including proper nouns). Nouns are denoted with numbers in parentheses.
    - Verbs. Verbs are denoted with numbers in parentheses.
    - Agents (who do events). Agents are denoted by agent followed by the number(s) in parentheses referring to the noun and verb.
    - Themes (who undergo events). Themes are denoted by theme followed by the number(s) in parentheses referring to the noun and verb.
    - Recipients (who receive events). Recipients are denoted by recipient followed by the number(s) in parentheses referring to the noun and verb.
    - Nmod (noun modifiers). Nmod are denoted by nmod followed by the number(s) in parentheses referring to the part of the sentence that is modified.
    - Ccomp (comparative clauses). Ccomp are denoted by ccomp followed by the number(s) in parentheses referring to the part of the sentence that it is a complement of.

    The input sentence is: {test_input} 

    """
    return application_prompt


Get the data we will test on and register our LLM

In [396]:
gen_set = pd.read_csv('./data/RECOGSgen.tsv', sep = '\t', header = None)
lm = dspy.LM(model='gpt-3.5-turbo', api_key=openai_key)
gen_set = gen_set.sample(100).reset_index(drop=True)


In [397]:
gen_set.head()

Unnamed: 0,0,1,2
0,Amelia gave Olivia the shark beside a bottle .,Amelia ( 12 ) ; Olivia ( 19 ) ; * shark ( 10 )...,prim_to_obj_common
1,Lucas gave Liam a hedgehog on a table .,Lucas ( 11 ) ; Liam ( 22 ) ; hedgehog ( 12 ) ;...,subj_to_obj_common
2,The wolf on a computer was handed a bat by the...,* wolf ( 7 ) ; computer ( 2 ) ; bat ( 29 ) ; *...,obj_pp_to_subj_pp
3,The girl thought that the shark liked a cake .,* girl ( 50 ) ; * shark ( 8 ) ; cake ( 7 ) ; t...,prim_to_subj_common
4,Emma gave a hedgehog to Joshua .,Emma ( 47 ) ; hedgehog ( 4 ) ; Joshua ( 36 ) ;...,subj_to_obj_common


Get a bunch of plans!

In [398]:
plans = []
for i in range(0, 10):
    plans.append(lm(get_plan_prompt(examples), temperature=0.9, max_tokens=1000)[0])
    

See which plan gets the most votes

In [399]:
n = len(plans)
plan = "\n\n".join(plans)
vote = lm(vote_prompt(plan, n = n), temperature=0.9, max_tokens=1000)[0]
matches = re.findall(r'\d+(?:\.\d+)?', vote)
if matches:
    vote = matches[-1]
print (f"Vote: {vote}")
final_plan = plans[int(vote)]

Vote: 3


Now apply the plan and see how we do!

In [400]:
predictions = []
for i, row in gen_set.iterrows():
    logical_form = lm(apply_plan(final_plan, row[0], examples), temperature=0.9, max_tokens=1000)
    predictions.append(logical_form[0])

edit_predictions = []
for string in predictions:
    new_string = string.replace("Output:", "").strip()
    edit_predictions.append(new_string)

In [405]:
accuracy = []

for i, row in gen_set.iterrows():
    accuracy.append(recogs_exact_match(row[1], edit_predictions[i]))


In [406]:
np.mean(accuracy)

0.0

Try the same but dont do meta learning

In [407]:
def get_plan_prompt_sans_meta(examples):

    meta_learning = examples.sample(1)
    more_examples = examples.sample(7)

    inputs = pd.concat([meta_learning, more_examples])['input'].to_list()
    outputs = pd.concat([meta_learning, more_examples])['output'].to_list()


    inputs = "\n".join(inputs)
    outputs = "\n".join(outputs)       

    plan_prompt = f"""

    Imagine you are an expert in semantic parsing of natural language.

    You must develop a plan to convert each sentence into its logical form.

    A series of inputs and outputs are given to you. For example:
    
    Input: {inputs}
    Output: {outputs}

    For every example, the possible roles that words can take are as follows:

    - Nouns (including proper nouns). Nouns are denoted with numbers in parentheses.
    - Verbs. Verbs are denoted with numbers in parentheses.
    - Agents (who do events). Agents are denoted by agent followed by the number(s) in parentheses referring to the noun and verb.
    - Themes (who undergo events). Themes are denoted by theme followed by the number(s) in parentheses referring to the noun and verb.
    - Recipients (who receive events). Recipients are denoted by recipient followed by the number(s) in parentheses referring to the noun and verb.
    - Nmod (noun modifiers). Nmod are denoted by nmod followed by the number(s) in parentheses referring to the part of the sentence that is modified.
    - Ccomp (comparative clauses). Ccomp are denoted by ccomp followed by the number(s) in parentheses referring to the part of the sentence that it is a complement of.


    Your output should be a plan for converting a query input into its logical form.



    """    

    return plan_prompt

In [408]:
plans = []
for i in range(0, 10):
    plans.append(lm(get_plan_prompt_sans_meta(examples), temperature=0.9, max_tokens=1000)[0])
    
n = len(plans)
plan = "\n\n".join(plans)
vote = lm(vote_prompt(plan, n = n), temperature=0.9, max_tokens=1000)[0]
matches = re.findall(r'\d+(?:\.\d+)?', vote)
if matches:
    vote = matches[-1]
print (f"Vote: {vote}")
final_plan = plans[int(vote)]    

predictions = []
for i, row in gen_set.iterrows():
    logical_form = lm(apply_plan(final_plan, row[0], examples), temperature=0.9, max_tokens=1000)
    predictions.append(logical_form[0])

edit_predictions = []
for string in predictions:
    new_string = string.replace("Output:", "").strip()
    edit_predictions.append(new_string)
    
accuracy = []

for i, row in gen_set.iterrows():
    accuracy.append(recogs_exact_match(row[1], edit_predictions[i]))    

np.mean(accuracy)    

Vote: 3


0.0

Next, I prepare the data for DsPy. First, I'll start with a Simple Meta Learning Signtature

In [412]:
dspy_recogs_train = [
    dspy.Example(
        sentence=row['input'], logical_form=row['output']
    ).with_inputs("sentence")
    for _, row in examples.iterrows()]

In [None]:
class SimpleMetaLearn(dspy.Signature):

    __doc__ = """
    Imagine you are an expert in semantic parsing of natural language.
    
    A series of inputs and outputs are given to you. For example:
    Input: "A bat was cooked ."
    Output: "bat ( 59 ) ; cook ( 2 ) AND theme ( 2 , 59 )"
    To ensure that you focus on the semantic meaning of the sentences, the words themselves will replaced with other, equivalent words.
    For example, "bat" might be replaced with "cat", and "cook" might be replaced with "eat".
    Below is a full example of the input and output sentences, with the words replaced with other, equivalent words:

    Examples: 

    Input: A rose was floated by Emma  --> Output: teapot ( 30 ) ; Emma ( 15 ) ; float ( 52 ) AND theme ( 52 , 30 ) AND agent ( 52 , 15 )
    Input: Isabella liked that a cake was eaten . -->  Output: Isabella ( 13 ) ; mother ( 14 ) ; like ( 39 ) AND agent ( 39 , 13 ) AND ccomp ( 39 , 10 ) AND dust ( 10 ) AND theme ( 10 , 14 )
    Input: A sandwich was grown . -->  Output: queen ( 11 ) ; grow ( 52 ) AND theme ( 52 , 11 )
    Input: James respected that a box was shortened by a horse . -->  Output: James ( 45 ) ; hat ( 6 ) ; buyer ( 43 ) ; respect ( 0 ) AND agent ( 0 , 45 ) AND ccomp ( 0 , 15 ) AND shorten ( 15 ) AND theme ( 15 , 6 ) AND agent ( 15 , 43 )
    Input: Ava liked a donut on the plate . -->  Output: Ava ( 9 ) ; journalist ( 23 ) ; * tenant ( 15 ) ; like ( 37 ) AND agent ( 37 , 9 ) AND theme ( 37 , 23 ) AND nmod . on ( 23 , 15 )
    Input: A bottle was enlarged by Olivia . -->  Output: coach ( 19 ) ; Olivia ( 51 ) ; enlarge ( 56 ) AND theme ( 56 , 19 ) AND agent ( 56 , 51 )
    
    Query: 
    Input: A rose was helped by a dog . --> Output: teapot ( 1 ) ; dog ( 36 ) ; help ( 40 ) AND theme ( 40 , 1 ) AND agent ( 40 , 36 )
    
    For every example, the possible roles that words can take are as follows:
    
     - Nouns (including proper nouns). Nouns are denoted with numbers in parentheses.
     - Verbs. Verbs are denoted with numbers in parentheses.
     - Agents (who do events). Agents are denoted by agent followed by the number(s) in parentheses referring to the noun and verb.
     - Themes (who undergo events). Themes are denoted by theme followed by the number(s) in parentheses referring to the noun and verb.
     - Recipients (who receive events). Recipients are denoted by recipient followed by the number(s) in parentheses referring to the noun and verb.
     - Nmod (noun modifiers). Nmod are denoted by nmod followed by the number(s) in parentheses referring to the part of the sentence that is modified.
     - Ccomp (comparative clauses). Ccomp are denoted by ccomp followed by the number(s) in parentheses referring to the part of the sentence that it is a complement of.

    """

    sentence = dspy.InputField(desc="A sentence to parse")
    logical_form = dspy.OutputField(desc="The logical form of the sentence")

class BasicMetaLearn(dspy.Module):
    def __init__(self):
        super().__init__()
        self.chain= dspy.ChainOfThought(SimpleMetaLearn)

    def forward(self, sentence):
        return self.chain(sentence = sentence).logical_form
        

    

I have to make a recogs_exact_match_alt function because I continue to have problems with the flag

In [472]:
from collections import defaultdict
from itertools import product
import re


def recogs_exact_match_alt(gold, pred, flag="000000"):
    flag="000000"
    gold = normalize_formula(gold.logical_form)
    pred = normalize_formula(pred)
    gold_conj_set = get_conj_set(gold)
    # Loop over all viable mappings from pred_vars to gold_vars:
    for this_map in _candidate_variable_maps(gold, pred):
        phi = pred
        for sourcevar, targetvar in this_map.items():
            # The flag makes sure we don't accidentally do a chain
            # of replacements via successive changes in situations
            # where the domain and range of `this_map` share vars.
            phi = variable_change(phi, sourcevar, targetvar, flag=flag)
        phi = phi.replace(flag, "")
        phi_conj_set = get_conj_set(phi)
        # This step assumes that we have no conjuncts that are
        # tautologies, contradictions, or equality predications. If
        # such are introduced, they need to be identified ahead of
        # time and treated separately -- tautologies would be removed,
        # contradictions would reduce to comparisons of only those
        # conjuncts, and equality statements would call for special
        # handling related to variables mapping.
        if phi_conj_set == gold_conj_set:
            return True
    return False


def normalize_formula(phi):
    return phi.replace(" ", "").replace("AND" , " AND ")


binary_pred_re = re.compile(r"""
    (\w+)
    \s*
    \(
    \s*
    (\d+)
    \s*
    ,
    \s*
    (\d+)
    \s*
    \)""", re.VERBOSE)


unary_pred_re = re.compile(r"""
    (\w+)
    \s*
    \(
    \s*
    (\d+)
    \s*
    \)""", re.VERBOSE)


def _candidate_variable_maps(gold, pred):
    # This creates a mapping from tuples of predicates into their
    # associated variables. These serve as equivalence classes over
    # variables that could possibly be translations of each other.
    gold_map = _map_get_preds_to_vars(gold)
    pred_map = _map_get_preds_to_vars(pred)

    # For each prediction variable, get the set of potential
    # translations for it:
    pred2gold = defaultdict(list)
    for preds, pvars in pred_map.items():
        gvars = gold_map[preds]
        for pvar in pvars:
            pred2gold[pvar] = gold_map[preds]

    # Variable sets:
    gold_vars = set(get_variables(gold))
    pred_vars = set(get_variables(pred))

    # Now generate potentially viable mappings:
    for vals in list(product(*list(pred2gold.values()))):
        d = dict(zip(pred2gold.keys(), vals))
        if set(d.keys()) == pred_vars and set(d.values()) == gold_vars:
            yield d


def _map_get_preds_to_vars(phi):
    var2pred = defaultdict(list)
    for pred, var in unary_pred_re.findall(phi):
        var2pred[var].append(pred)
    # We could do somewhat less search by specializing to first and
    # second position for these predicates, but I think it's fine
    # as-is.
    for pred, var1, var2 in binary_pred_re.findall(phi):
        var2pred[var1].append(pred)
        var2pred[var2].append(pred)
    pred2var = defaultdict(list)
    for var, preds in var2pred.items():
        pred2var[tuple(sorted(preds))].append(var)
    return pred2var


def get_variables(phi):
    variable_re = re.compile(r"(\d+)")
    return variable_re.findall(phi)


def get_conj_set(phi):
    conj_splitter_re  = re.compile(r"\s*(?:AND|;)\s*")
    return set(conj_splitter_re.split(phi))


def variable_change(phi, sourcevar, targetvar, flag="000000"):
    replace_re = re.compile(rf"\b{sourcevar}\b")
    return replace_re.sub(f"{flag}{targetvar}", phi)


Optimize! 

In [None]:
from dspy.teleprompt import BootstrapFewShot, LabeledFewShot, BootstrapFewShotWithRandomSearch
lm = dspy.LM(model='gpt-3.5-turbo', api_key=openai_key)
dspy.settings.configure(lm=lm)
fewshot_optimizer = BootstrapFewShot(metric=recogs_exact_match_alt)
compiled = fewshot_optimizer.compile(student = BasicMetaLearn(), trainset=dspy_recogs_train)

  0%|          | 0/100 [00:00<?, ?it/s]

  8%|▊         | 8/100 [00:06<01:17,  1.19it/s]

Bootstrapped 4 full traces after 8 examples for up to 1 rounds, amounting to 8 attempts.





In [463]:
gen_set['prediction'] = gen_set[0].apply(
    lambda x: compiled(sentence=x))     

In [466]:
gen_set['correct'] = gen_set.apply(
    lambda row: recogs_exact_match(row[1], row['prediction']), axis=1)  
gen_set['correct'].sum() / gen_set.shape[0]     

0.29

In [467]:
gen_set.groupby([2])['correct'].mean()

2
active_to_passive                                         1.000000
cp_recursion                                              0.000000
do_dative_to_pp_dative                                    0.000000
obj_omitted_transitive_to_transitive                      0.200000
obj_pp_to_subj_pp                                         0.222222
obj_to_subj_common                                        0.200000
obj_to_subj_proper                                        0.285714
only_seen_as_transitive_subj_as_unacc_subj                0.400000
only_seen_as_unacc_subj_as_obj_omitted_transitive_subj    0.000000
only_seen_as_unacc_subj_as_unerg_subj                     0.000000
passive_to_active                                         0.833333
pp_dative_to_do_dative                                    0.500000
pp_recursion                                              0.000000
prim_to_inf_arg                                           0.000000
prim_to_obj_common                                        0.

Function to make a new prompt

In [468]:
def get_plan_prompt(examples):

    examples = examples.sample(8)

    inputs = pd.concat([meta_learning, more_examples])['input'].to_list()
    outputs = pd.concat([meta_learning, more_examples])['output'].to_list()

    input_output = []

    for i, row in examples.iterrows():
        input_output.append(f"Input: {row['input']} --> Output: {row['output']}")
    input_output = "\n".join(input_output)

    plan_prompt = f"""
    Imagine you are an expert in semantic parsing of natural language.
    A series of inputs and outputs are given to you. 

    Examples: {input_output}

    For every example, the possible roles that words can take are as follows:
    
     - Nouns (including proper nouns). Nouns are denoted with numbers in parentheses.
     - Verbs. Verbs are denoted with numbers in parentheses.
     - Agents (who do events). Agents are denoted by agent followed by the number(s) in parentheses referring to the noun and verb.
     - Themes (who undergo events). Themes are denoted by theme followed by the number(s) in parentheses referring to the noun and verb.
     - Recipients (who receive events). Recipients are denoted by recipient followed by the number(s) in parentheses referring to the noun and verb.
     - Nmod (noun modifiers). Nmod are denoted by nmod followed by the number(s) in parentheses referring to the part of the sentence that is modified.
     - Ccomp (comparative clauses). Ccomp are denoted by ccomp followed by the number(s) in parentheses referring to the part of the sentence that it is a complement of.
     
    """    

    return plan_prompt


In [469]:
get_plan_prompt(examples)

'\n    Imagine you are an expert in semantic parsing of natural language.\n    A series of inputs and outputs are given to you. \n\n    Examples: Input: A cookie was missed . --> Output: cookie ( 48 ) ; miss ( 10 ) AND theme ( 10 , 48 )\nInput: The mandarin was doubled . --> Output: * mandarin ( 15 ) ; double ( 39 ) AND theme ( 39 , 15 )\nInput: Ava liked that Liam nursed . --> Output: Ava ( 17 ) ; Liam ( 34 ) ; like ( 44 ) AND agent ( 44 , 17 ) AND ccomp ( 44 , 22 ) AND nurse ( 22 ) AND agent ( 22 , 34 )\nInput: The cookie was given to Emma by a visitor . --> Output: * cookie ( 31 ) ; Emma ( 34 ) ; visitor ( 13 ) ; give ( 44 ) AND theme ( 44 , 31 ) AND recipient ( 44 , 34 ) AND agent ( 44 , 13 )\nInput: A puppy helped a donut . --> Output: puppy ( 38 ) ; donut ( 14 ) ; help ( 27 ) AND agent ( 27 , 38 ) AND theme ( 27 , 14 )\nInput: The pumpkin was liked by Liam . --> Output: * pumpkin ( 38 ) ; Liam ( 36 ) ; like ( 59 ) AND theme ( 59 , 38 ) AND agent ( 59 , 36 )\nInput: A pizza was cl

Make the simple learner -- no meta learning involved! 

In [470]:
class SimpleLearn(dspy.Signature):

    __doc__ = """
    'Imagine you are an expert in semantic parsing of natural language.
    A series of inputs and outputs are given to you.
    Examples: 
    Input: A cookie was missed . --> Output: cookie ( 48 ) ; miss ( 10 ) AND theme ( 10 , 48 )
    Input: The mandarin was doubled . --> Output: * mandarin ( 15 ) ; double ( 39 ) AND theme ( 39 , 15 )
    Input: Ava liked that Liam nursed . --> Output: Ava ( 17 ) ; Liam ( 34 ) ; like ( 44 ) AND agent ( 44 , 17 ) AND ccomp ( 44 , 22 ) AND nurse ( 22 ) AND agent ( 22 , 34 )
    Input: The cookie was given to Emma by a visitor . --> Output: * cookie ( 31 ) ; Emma ( 34 ) ; visitor ( 13 ) ; give ( 44 ) AND theme ( 44 , 31 ) AND recipient ( 44 , 34 ) AND agent ( 44 , 13 )
    Input: A puppy helped a donut . --> Output: puppy ( 38 ) ; donut ( 14 ) ; help ( 27 ) AND agent ( 27 , 38 ) AND theme ( 27 , 14 )
    Input: The pumpkin was liked by Liam . --> Output: * pumpkin ( 38 ) ; Liam ( 36 ) ; like ( 59 ) AND theme ( 59 , 38 ) AND agent ( 59 , 36 )
    Input: A pizza was cleaned by Emma . --> Output: pizza ( 2 ) ; Emma ( 20 ) ; clean ( 27 ) AND theme ( 27 , 2 ) AND agent ( 27 , 20 )
    Input: A sandwich was grown . --> Output: sandwich ( 11 ) ; grow ( 52 ) AND theme ( 52 , 11 )
    For every example, the possible roles that words can take are as follows: 
    - Nouns (including proper nouns). Nouns are denoted with numbers in parentheses.
    - Verbs. Verbs are denoted with numbers in parentheses.
    - Agents (who do events). Agents are denoted by agent followed by the number(s) in parentheses referring to the noun and verb.
    - Themes (who undergo events). Themes are denoted by theme followed by the number(s) in parentheses referring to the noun and verb.
    - Recipients (who receive events). Recipients are denoted by recipient followed by the number(s) in parentheses referring to the noun and verb.
    - Nmod (noun modifiers). Nmod are denoted by nmod followed by the number(s) in parentheses referring to the part of the sentence that is modified.
    - Ccomp (comparative clauses). Ccomp are denoted by ccomp followed by the number(s) in parentheses referring to the part of the sentence that it is a complement of.'
    """
    sentence = dspy.InputField(desc="A sentence to parse")
    logical_form = dspy.OutputField(desc="The logical form of the sentence")

class BasicLearn(dspy.Module):
    def __init__(self):
        super().__init__()
        self.chain= dspy.ChainOfThought(SimpleLearn)

    def forward(self, sentence):
        return self.chain(sentence = sentence).logical_form    

In [473]:
from dspy.teleprompt import BootstrapFewShot, LabeledFewShot, BootstrapFewShotWithRandomSearch
lm = dspy.LM(model='gpt-3.5-turbo', api_key=openai_key)
dspy.settings.configure(lm=lm)
fewshot_optimizer = BootstrapFewShot(metric=recogs_exact_match_alt)
compiled = fewshot_optimizer.compile(student = BasicLearn(), trainset=dspy_recogs_train)

  8%|▊         | 8/100 [00:08<01:33,  1.01s/it]

Bootstrapped 4 full traces after 8 examples for up to 1 rounds, amounting to 8 attempts.





In [474]:
gen_set['prediction'] = gen_set[0].apply(
    lambda x: compiled(sentence=x))     
gen_set['correct'] = gen_set.apply(
    lambda row: recogs_exact_match(row[1], row['prediction']), axis=1)  
gen_set['correct'].sum() / gen_set.shape[0]     

0.27

In [475]:
gen_set.groupby([2])['correct'].mean()

2
active_to_passive                                         0.500000
cp_recursion                                              0.000000
do_dative_to_pp_dative                                    0.000000
obj_omitted_transitive_to_transitive                      0.200000
obj_pp_to_subj_pp                                         0.111111
obj_to_subj_common                                        0.200000
obj_to_subj_proper                                        0.285714
only_seen_as_transitive_subj_as_unacc_subj                0.400000
only_seen_as_unacc_subj_as_obj_omitted_transitive_subj    0.200000
only_seen_as_unacc_subj_as_unerg_subj                     0.000000
passive_to_active                                         0.666667
pp_dative_to_do_dative                                    0.000000
pp_recursion                                              0.000000
prim_to_inf_arg                                           0.000000
prim_to_obj_common                                        0.

And here we try out tree of thought

In [478]:
class Plans(dspy.Signature):


    __doc__ = """
    Imagine you are an expert in semantic parsing of natural language.
    You must develop a plan to convert each sentence into its logical form.

    A series of inputs and outputs are given to you. For example:
    Input: "A bat was cooked ."
    Output: "bat ( 59 ) ; cook ( 2 ) AND theme ( 2 , 59 )"

    To ensure that you focus on the semantic meaning of the sentences, the words themselves will replaced with other, equivalent words.
    For example, "bat" might be replaced with "cat", and "cook" might be replaced with "eat".
    Below is a full example of the input and output sentences, with the words replaced with other, equivalent words:

    Examples: 

    Input: A rose was floated by Emma  --> Output: teapot ( 30 ) ; Emma ( 15 ) ; float ( 52 ) AND theme ( 52 , 30 ) AND agent ( 52 , 15 )
    Input: Isabella liked that a cake was eaten . -->  Output: Isabella ( 13 ) ; mother ( 14 ) ; like ( 39 ) AND agent ( 39 , 13 ) AND ccomp ( 39 , 10 ) AND dust ( 10 ) AND theme ( 10 , 14 )
    Input: A sandwich was grown . -->  Output: queen ( 11 ) ; grow ( 52 ) AND theme ( 52 , 11 )
    Input: James respected that a box was shortened by a horse . -->  Output: James ( 45 ) ; hat ( 6 ) ; buyer ( 43 ) ; respect ( 0 ) AND agent ( 0 , 45 ) AND ccomp ( 0 , 15 ) AND shorten ( 15 ) AND theme ( 15 , 6 ) AND agent ( 15 , 43 )
    Input: Ava liked a donut on the plate . -->  Output: Ava ( 9 ) ; journalist ( 23 ) ; * tenant ( 15 ) ; like ( 37 ) AND agent ( 37 , 9 ) AND theme ( 37 , 23 ) AND nmod . on ( 23 , 15 )
    Input: A bottle was enlarged by Olivia . -->  Output: coach ( 19 ) ; Olivia ( 51 ) ; enlarge ( 56 ) AND theme ( 56 , 19 ) AND agent ( 56 , 51 )
    
    Query: 
    Input: A rose was helped by a dog . --> Output: teapot ( 1 ) ; dog ( 36 ) ; help ( 40 ) AND theme ( 40 , 1 ) AND agent ( 40 , 36 )
    
    For every example, the possible roles that words can take are as follows:
    
     - Nouns (including proper nouns). Nouns are denoted with numbers in parentheses.
     - Verbs. Verbs are denoted with numbers in parentheses.
     - Agents (who do events). Agents are denoted by agent followed by the number(s) in parentheses referring to the noun and verb.
     - Themes (who undergo events). Themes are denoted by theme followed by the number(s) in parentheses referring to the noun and verb.
     - Recipients (who receive events). Recipients are denoted by recipient followed by the number(s) in parentheses referring to the noun and verb.
     - Nmod (noun modifiers). Nmod are denoted by nmod followed by the number(s) in parentheses referring to the part of the sentence that is modified.
     - Ccomp (comparative clauses). Ccomp are denoted by ccomp followed by the number(s) in parentheses referring to the part of the sentence that it is a complement of.
    
     Your output should be a plan for converting a query input into its logical form.
    """

    sentence = dspy.InputField(desc="a sentence to parse")
    plan = dspy.OutputField(desc="a plan to parse the sentence")

class Vote(dspy.Signature):

    __doc__ = """Imagine you are a group of five experts and you are given a list of plans to convert a sentence into its logical form.

    Your task to score each plan on a scale of 1 to 10, where 1 is "very bad" and 10 is "very good".

    After voting on each plan, select the plan that received the most votes."""

    
    plan = dspy.InputField(desc="a list of plans to parse the sentence")
    vote: int = dspy.OutputField(desc="an index for the plan that received the most votes")

class ApplyPlan(dspy.Signature):
    __doc__ = """
    In your output, assume the words are not permuted. For example, if the input sentence is "A bat was cooked", your output should be "bat ( 59 ) ; cook ( 2 ) AND theme ( 2 , 59 )".

    For every example, the possible roles that words can take are as follows:

    - Nouns (including proper nouns). Nouns are denoted with numbers in parentheses.
    - Verbs. Verbs are denoted with numbers in parentheses.
    - Agents (who do events). Agents are denoted by agent followed by the number(s) in parentheses referring to the noun and verb.
    - Themes (who undergo events). Themes are denoted by theme followed by the number(s) in parentheses referring to the noun and verb.
    - Recipients (who receive events). Recipients are denoted by recipient followed by the number(s) in parentheses referring to the noun and verb.
    - Nmod (noun modifiers). Nmod are denoted by nmod followed by the number(s) in parentheses referring to the part of the sentence that is modified.
    - Ccomp (comparative clauses). Ccomp are denoted by ccomp followed by the number(s) in parentheses referring to the part of the sentence that it is a complement of.
    """
    test_input = dspy.InputField(desc="a sentence to parse")
    plan = dspy.InputField(desc="a plan to parse the sentence")
    logical_form = dspy.OutputField(desc="the logical form of the sentence")

In [479]:
class MetaLearn(dspy.Module):
    def __init__(self):
        super().__init__()
        self.chain= dspy.ChainOfThought(Plans)
        self.vote = dspy.ChainOfThought(Vote)
        self.apply_plan = dspy.ChainOfThought(ApplyPlan)

    def forward(self, sentence):

        plans = []
        for i in range(0, 5):
            plans.append(self.chain(sentence = sentence).plan)
        all_plans = "\n\n".join(plans)
        vote = self.vote(plan = all_plans).vote
        final_plan = plans[int(vote)]
        logical_form = self.apply_plan(test_input = sentence, plan = final_plan).logical_form
        return logical_form

In [480]:
from dspy.teleprompt import BootstrapFewShot, LabeledFewShot, BootstrapFewShotWithRandomSearch
lm = dspy.LM(model='gpt-3.5-turbo', api_key=openai_key)
dspy.settings.configure(lm=lm)
fewshot_optimizer = BootstrapFewShot(metric=recogs_exact_match_alt)
compiled = fewshot_optimizer.compile(student = MetaLearn(), trainset=dspy_recogs_train)

 14%|█▍        | 14/100 [00:50<04:25,  3.09s/it]2025/05/09 19:20:00 ERROR dspy.teleprompt.bootstrap: Failed to run or to evaluate example Example({'sentence': 'A mouse was passed a bag .', 'logical_form': 'mouse ( 36 ) ; bag ( 56 ) ; pass ( 21 ) AND recipient ( 21 , 36 ) AND theme ( 21 , 56 )'}) (input_keys={'sentence'}) with <function recogs_exact_match_alt at 0x3116179d0> due to list index out of range.
 24%|██▍       | 24/100 [01:24<04:28,  3.53s/it]

Bootstrapped 4 full traces after 24 examples for up to 1 rounds, amounting to 24 attempts.





In [481]:
gen_set['prediction'] = gen_set[0].apply(
    lambda x: compiled(sentence=x))     
gen_set['correct'] = gen_set.apply(
    lambda row: recogs_exact_match(row[1], row['prediction']), axis=1)  
gen_set['correct'].sum() / gen_set.shape[0]    

0.2

In [482]:
gen_set.groupby([2])['correct'].mean()

2
active_to_passive                                         0.500000
cp_recursion                                              0.000000
do_dative_to_pp_dative                                    0.000000
obj_omitted_transitive_to_transitive                      0.200000
obj_pp_to_subj_pp                                         0.000000
obj_to_subj_common                                        0.200000
obj_to_subj_proper                                        0.285714
only_seen_as_transitive_subj_as_unacc_subj                0.200000
only_seen_as_unacc_subj_as_obj_omitted_transitive_subj    0.000000
only_seen_as_unacc_subj_as_unerg_subj                     0.000000
passive_to_active                                         0.333333
pp_dative_to_do_dative                                    0.000000
pp_recursion                                              0.000000
prim_to_inf_arg                                           0.000000
prim_to_obj_common                                        0.