# Baseline Model for Project

In [79]:
from dotenv import load_dotenv

In [80]:
import pandas as pd
from compgen import recogs_exact_match
import dspy
import numpy as np

In [81]:
import os
# keep the API keys in a `.env` file in the local root directory
load_dotenv()
openai_key = os.getenv('OPENAI_API_KEY')  # use the .env file as it is a good practice to keep keys outside of one's code


In [82]:
train_set = pd.read_csv('./data/RECOGStrain.tsv', sep = '\t', header = None)
gen_set = pd.read_csv('./data/RECOGSgen.tsv', sep = '\t', header = None)

In [83]:
prompt = """Translate each of the following sentences into ReCOGS logical format."""

In [84]:
lm = dspy.LM(model='gpt-3.5-turbo', api_key=openai_key)

In [85]:
def make_prompt(sentences: list[str]) -> str:
    prompt = "Translate each of the following sentences into ReCOGS logical format:\n\n"
    for i, s in enumerate(sentences, 1):
        prompt += f"{i}. {s}\n"
    return prompt

In [86]:
output_predictions = []

for i, row in gen_set[0:5].iterrows():
    # get the sentence
    sentence = row[0]
    # make the prompt
    prompt = make_prompt([sentence])
    # get the prediction
    prediction = lm(prompt, temperature=0.9, max_tokens=1000)[0]
    output_predictions.append(prediction)

In [87]:
accuracy = []

for i, row in gen_set[0:5].iterrows():
    print (output_predictions[i])
    accuracy.append(recogs_exact_match(row[1], output_predictions[i]))



There is a person, Paula. Paula did an action, painted. The thing Paula painted is a cake. The location where Paula painted the cake is a closet.
belief(Zoe, clean(hippo))
(teleport princess cookie goose)
∃x∃y∃z(cockroach(x) ∧ pretzel(y) ∧ person(z) ∧ name(z, "Olivia") ∧ gave(x, y, z))
Hopped(hippo,paint,Girl)


In [88]:
np.mean(accuracy)

0.0

In [89]:
dspy.settings.configure(lm=lm)

In [90]:
class RecogsSignature(dspy.Signature):
    __doc__ = """Translate English sentences into logical form."""

    sentence = dspy.InputField()
    logical_form = dspy.OutputField()

In [91]:
class RecogsDspy(dspy.Module):
    def __init__(self):
        super().__init__()
        # Create an attribute called `self.predict`:
        ##### YOUR CODE HERE
        self.predict = dspy.Predict(RecogsSignature)


    def forward(self, sentence):
        """Use the prediction later defined in the `init` method,
        and return a `dspy.Prediction` instance with attribute
        `logical_form`.
        """
        ##### YOUR CODE HERE
        return self.predict(sentence = sentence)



In [92]:
recogs_dspy_model = RecogsDspy()

In [93]:
from dspy.teleprompt import LabeledFewShot

def fewshot_recogs_dspy(model):
    ##### YOUR CODE HERE
    teleprompter = LabeledFewShot(k = 10)
    compiled = teleprompter.compile(student = model, trainset = dspy_recogs_train)
    return compiled

In [94]:
dspy_recogs_train = [
    dspy.Example(
        sentence=row[0], logical_form=row[1]
    ).with_inputs("sentence")
    for _, row in train_set.iterrows()]

In [95]:
compiled_recogs_dspy_model = fewshot_recogs_dspy(recogs_dspy_model)

In [None]:
gen_set_sample = gen_set.sample(100, random_state=42)

In [99]:
gen_set_sample['prediction'] = gen_set_sample[0].apply(
    lambda x: compiled_recogs_dspy_model(sentence=x).logical_form)
  

In [100]:
preds =  list(gen_set_sample['prediction'])
gold = list(gen_set_sample[1])

In [102]:
gen_set_sample['correct'] = gen_set_sample.apply(
    lambda row: recogs_exact_match(row[1], row['prediction']), axis=1)  

In [106]:
gen_set_sample['correct'].mean()

0.2

In [78]:
gen_set_sample[1][0:5].to_list()

['Noah ( 9 ) ; Ava ( 33 ) ; * ball ( 48 ) ; ship ( 14 ) AND agent ( 14 , 9 ) AND recipient ( 14 , 33 ) AND theme ( 14 , 48 )',
 '* bear ( 40 ) ; crystal ( 7 ) ; squeeze ( 15 ) AND agent ( 15 , 40 ) AND theme ( 15 , 7 )',
 '* donut ( 47 ) ; bed ( 48 ) ; * girl ( 42 ) ; boat ( 46 ) ; nmod . beside ( 47 , 48 ) AND sketch ( 0 ) AND theme ( 0 , 47 ) AND agent ( 0 , 42 ) AND nmod . beside ( 42 , 46 )',
 'Emma ( 18 ) ; * boy ( 30 ) ; cake ( 24 ) ; * house ( 34 ) ; ship ( 5 ) AND agent ( 5 , 18 ) AND recipient ( 5 , 30 ) AND theme ( 5 , 24 ) AND nmod . in ( 24 , 34 )',
 '* mouse ( 46 ) ; girl ( 5 ) ; Charlotte ( 29 ) ; Lucas ( 37 ) ; Emma ( 25 ) ; dog ( 4 ) ; Wyatt ( 11 ) ; Evelyn ( 35 ) ; Noah ( 15 ) ; Thomas ( 42 ) ; William ( 10 ) ; Savannah ( 28 ) ; boy ( 36 ) ; like ( 17 ) AND agent ( 17 , 46 ) AND ccomp ( 17 , 24 ) AND hope ( 24 ) AND agent ( 24 , 5 ) AND ccomp ( 24 , 50 ) AND hope ( 50 ) AND agent ( 50 , 29 ) AND ccomp ( 50 , 18 ) AND confess ( 18 ) AND agent ( 18 , 37 ) AND ccomp ( 18 