# LLMs as Symbolic Pattern Machines

### Imports

In [1]:
import os
import random
from transformers import GPT2Tokenizer
import pandas as pd
import json
import openai
import time

### Constants

In [2]:
JSONS_FOLDER = "mistake_jsons_more_context"
ALL_MISTAKE_JSONS = os.listdir(JSONS_FOLDER)

CORRECT_JSON_FOLDER = os.path.join("mistake_jsons_split", "correct")
CORRECT_JSON_FILES = os.listdir(CORRECT_JSON_FOLDER)
MISTAKE_JSON_FOLDER = os.path.join("mistake_jsons_split", "mistake")
MISTAKE_JSON_FILES = os.listdir(MISTAKE_JSON_FOLDER)

# USABLE_OPENAI_API_KEY = ""
# ANTONINO_OPENAI_API_KEY = ""
# openai.api_key = ANTONINO_OPENAI_API_KEY  # USABLE_OPENAI_API_KEY

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = "text-davinci-003"
token_limit = 4096

### Response function

In [3]:
def LLM(prompt, stop=None, max_tokens=256, temperature=0):
    responses = openai.Completion.create(
        engine=model,
        prompt=prompt,
        max_tokens=max_tokens,
        temperature=temperature,
        stop=stop,
    )
    print(type(responses))
    text = [response["text"] for response in responses["choices"]]
    return text

### Esempio di utilizzo

In [4]:
selected_json = ALL_MISTAKE_JSONS[random.randint(0, len(ALL_MISTAKE_JSONS))]

with open(os.path.join(JSONS_FOLDER, selected_json), "r") as f:
    curr_dict = json.load(f)
input_for_LLM = curr_dict["context_str"] + curr_dict["input_str"]
predicted = LLM(input_for_LLM)
print(input_for_LLM)
print("GT:\n", curr_dict["output_str"])
print("Predicted:\n", predicted[0])

<class 'openai.openai_object.OpenAIObject'>
Input Sequence:
 138, 125, 57, 60, 107, 111, 16, 20
Next Symbol:
 82
---
Input Sequence:
 143, 125, 107, 57, 74, 16
Next Symbol:
 20
---
Input Sequence:
 57, 125, 60, 107, 111, 82
Next Symbol:
 138
---
Input Sequence:
 57, 125, 60, 107, 111, 82
Next Symbol:
 138
---
Input Sequence:
 125, 57, 107, 82, 60
Next Symbol:
 138
---
Input Sequence:
 125, 57, 107, 111, 138, 77, 82, 20, 12
Next Symbol:

GT:
  16

Predicted:
  60


### Evaluation
For the time being, we cannot freely use the `openai API`, we are instead constrined on their rate limits:
- 3 PROMPTS/MIN
- 200 PROMPTS/DAY

This is why we use `time.sleep(60)` and break the evaluation at the 50® step.

#### Evaluate on all procedures

In [5]:
tot = 0
correct = 0

for n, json_file in enumerate(ALL_MISTAKE_JSONS[:150]):
    tot += 1
    with open(os.path.join(JSONS_FOLDER, json_file), "r") as f:
        curr_dict = json.load(f)
    input_for_LLM = curr_dict["context_str"] + curr_dict["input_str"]
    predicted = LLM(input_for_LLM)
    predicted = predicted[0].strip()
    gt = curr_dict["output_str"].strip()
    print(
        "Procedure Label: {}\nGT: {}\nPred:{}\nCorrect: {}\n".format(
            curr_dict["procedure_label"], gt, predicted, gt == predicted
        )
    )
    if predicted == curr_dict["output_str"].strip():
        correct += 1

ratio = correct / tot
print("Ratio:", ratio, f"{correct}/{tot}")

<class 'openai.openai_object.OpenAIObject'>
Procedure Label: correct
GT: 107
Pred:107
Correct: True

<class 'openai.openai_object.OpenAIObject'>
Procedure Label: correct
GT: 111
Pred:57
Correct: False

<class 'openai.openai_object.OpenAIObject'>
Procedure Label: mistake
GT: 46
Pred:21
Correct: False



#### Evaluate on correct procedures

In [18]:
tot = 0
correct = 0

for n, json_file in enumerate(CORRECT_JSON_FILES):
    tot += 1
    print(json_file)
    with open(os.path.join(CORRECT_JSON_FOLDER, json_file), "r") as f:
        curr_dict = json.load(f)
    input_for_LLM = curr_dict["context_str"] + curr_dict["input_str"]
    predicted = LLM(input_for_LLM)
    predicted = predicted[0].strip()
    gt = curr_dict["output_str"].strip()
    print(
        "Procedure Label: {}\nGT: {}\nPred:{}\nCorrect: {}\n".format(
            curr_dict["procedure_label"], gt, predicted, gt == predicted
        )
    )
    if predicted == curr_dict["output_str"].strip():
        correct += 1

ratio = correct / tot
print("Ratio:", ratio, f"{correct}/{tot}")

nusar-2021_action_both_9011-b06b_9011_user_id_2021-02-01_154253.json
<class 'openai.openai_object.OpenAIObject'>
Procedure Label: correct
GT: 107
Pred:107
Correct: True

nusar-2021_action_both_9011-b08c_9011_user_id_2021-02-01_154736.json
<class 'openai.openai_object.OpenAIObject'>
Procedure Label: correct
GT: 111
Pred:57
Correct: False

nusar-2021_action_both_9011-c03f_9011_user_id_2021-02-01_160239.json
<class 'openai.openai_object.OpenAIObject'>
Procedure Label: correct
GT: 99
Pred:99
Correct: True

nusar-2021_action_both_9012-b06d_9012_user_id_2021-02-01_163713.json
<class 'openai.openai_object.OpenAIObject'>
Procedure Label: correct
GT: 143
Pred:143
Correct: True

nusar-2021_action_both_9012-c07c_9012_user_id_2021-02-01_164345.json
<class 'openai.openai_object.OpenAIObject'>
Procedure Label: correct
GT: 111
Pred:111
Correct: True

nusar-2021_action_both_9013-c03b_9013_user_id_2021-02-24_113410.json
<class 'openai.openai_object.OpenAIObject'>
Procedure Label: correct
GT: 101
Pred:1

#### Evaluate on mistaken procedures

In [19]:
tot = 0
correct = 0

for n, json_file in enumerate(MISTAKE_JSON_FILES):
    tot += 1
    with open(os.path.join(MISTAKE_JSON_FOLDER, json_file), "r") as f:
        curr_dict = json.load(f)
    input_for_LLM = curr_dict["context_str"] + curr_dict["input_str"]
    predicted = LLM(input_for_LLM)
    predicted = predicted[0].strip()
    gt = curr_dict["output_str"].strip()
    print(
        "Procedure Label: {}\nGT: {}\nPred:{}\nCorrect: {}\n".format(
            curr_dict["procedure_label"], gt, predicted, gt == predicted
        )
    )
    if predicted == curr_dict["output_str"].strip():
        correct += 1

ratio = correct / tot
print("Ratio:", ratio, f"{correct}/{tot}")

<class 'openai.openai_object.OpenAIObject'>
Procedure Label: mistake
GT: 46
Pred:21
Correct: False

<class 'openai.openai_object.OpenAIObject'>
Procedure Label: mistake
GT: 136
Pred:16
Correct: False

<class 'openai.openai_object.OpenAIObject'>
Procedure Label: mistake
GT: 288
Pred:111
Correct: False

<class 'openai.openai_object.OpenAIObject'>
Procedure Label: mistake
GT: 36
Pred:142
Correct: False

<class 'openai.openai_object.OpenAIObject'>
Procedure Label: mistake
GT: 111
Pred:48
Correct: False

<class 'openai.openai_object.OpenAIObject'>
Procedure Label: mistake
GT: 209
Pred:68
Correct: False

<class 'openai.openai_object.OpenAIObject'>
Procedure Label: mistake
GT: 245
Pred:138
Correct: False

<class 'openai.openai_object.OpenAIObject'>
Procedure Label: mistake
GT: 225
Pred:58
Correct: False

<class 'openai.openai_object.OpenAIObject'>
Procedure Label: mistake
GT: 202
Pred:57
Correct: False

<class 'openai.openai_object.OpenAIObject'>
Procedure Label: mistake
GT: 36
Pred:125
Corre

## Split procedures

In [5]:
def truncated_strings(json_fn):
    with open(os.path.join(JSONS_FOLDER, json_fn), "r") as f:
        curr_dict = json.load(f)
    all_truncated_prompts = []
    all_gts = []
    input_str = curr_dict["input_str"]
    context = curr_dict["context_str"]
    output_str = curr_dict["output_str"]
    input_prompt, sequence_, output_prompt, _ = curr_dict["input_str"].split("\n")
    sequence = sequence_.split(",")
    for i in range(len(sequence)):
        curr_str = (
            context
            + input_prompt
            + "\n"
            + ",".join(sequence[:i])
            + "\n"
            + output_prompt
            + "\n"
        )
        curr_res = sequence[i]
        all_truncated_prompts.append(curr_str)
        all_gts.append(curr_res)
    all_truncated_prompts.append(context + input_str)
    all_gts.append(output_str)
    return all_truncated_prompts, all_gts

#### Evaluate on correct procedures

In [7]:
tot = 0
correct = 0

for n, json_file in enumerate(CORRECT_JSON_FILES):
    print(json_file)
    sequences, gts = truncated_strings(json_file)
    for input_str, gt in zip(sequences, gts):
        tot += 1
        # print("====\n", input_str)
        # print("GT:", gt)
        predicted = LLM(input_str)
        predicted = predicted[0].strip()
        gt = gt.strip()
        print("GT: {}\nPred:{}\nCorrect: {}\n".format(gt, predicted, gt == predicted))
        if predicted == gt:
            correct += 1

ratio = correct / tot
print("Ratio:", ratio, f"{correct}/{tot}")

nusar-2021_action_both_9011-b06b_9011_user_id_2021-02-01_154253.json
<class 'openai.openai_object.OpenAIObject'>
GT: 125
Pred:21
Correct: False

<class 'openai.openai_object.OpenAIObject'>
GT: 143
Pred:143
Correct: True

<class 'openai.openai_object.OpenAIObject'>
GT: 29
Pred:21
Correct: False

<class 'openai.openai_object.OpenAIObject'>
GT: 21
Pred:107
Correct: False

<class 'openai.openai_object.OpenAIObject'>
GT: 141
Pred:107
Correct: False

<class 'openai.openai_object.OpenAIObject'>
GT: 107
Pred:107
Correct: True

nusar-2021_action_both_9011-b08c_9011_user_id_2021-02-01_154736.json
<class 'openai.openai_object.OpenAIObject'>
GT: 36
Pred:Error: No input sequence provided.
Correct: False

<class 'openai.openai_object.OpenAIObject'>
GT: 143
Pred:115
Correct: False

<class 'openai.openai_object.OpenAIObject'>
GT: 125
Pred:57
Correct: False

<class 'openai.openai_object.OpenAIObject'>
GT: 17
Pred:57
Correct: False

<class 'openai.openai_object.OpenAIObject'>
GT: 44
Pred:57
Correct: Fal

In [8]:
tot = 0
correct = 0
performance_dict_per_step = {}
for n, json_file in enumerate(MISTAKE_JSON_FILES):
    print(json_file)
    sequences, gts = truncated_strings(json_file)
    for input_str, gt in zip(sequences, gts):
        tot += 1
        # print("====\n", input_str)
        # print("GT:", gt)
        predicted = LLM(input_str)
        predicted = predicted[0].strip()
        gt = gt.strip()
        print("GT: {}\nPred:{}\nCorrect: {}\n".format(gt, predicted, gt == predicted))
        if predicted == gt:
            correct += 1

ratio = correct / tot
print("Ratio:", ratio, f"{correct}/{tot}")

nusar-2021_action_both_9011-c01c_9011_user_id_2021-02-01_155620.json
<class 'openai.openai_object.OpenAIObject'>
GT: 143
Pred:21
Correct: False

<class 'openai.openai_object.OpenAIObject'>
GT: 125
Pred:21
Correct: False

<class 'openai.openai_object.OpenAIObject'>
GT: 107
Pred:21
Correct: False

<class 'openai.openai_object.OpenAIObject'>
GT: 89
Pred:21
Correct: False

<class 'openai.openai_object.OpenAIObject'>
GT: 36
Pred:21
Correct: False

<class 'openai.openai_object.OpenAIObject'>
GT: 46
Pred:21
Correct: False

nusar-2021_action_both_9011-c13b_9011_user_id_2021-02-01_160915.json
<class 'openai.openai_object.OpenAIObject'>
GT: 143
Pred:16
Correct: False

<class 'openai.openai_object.OpenAIObject'>
GT: 136
Pred:16
Correct: False

nusar-2021_action_both_9012-a16_9012_user_id_2021-02-01_162904.json
<class 'openai.openai_object.OpenAIObject'>
GT: 143
Pred:143
Correct: True

<class 'openai.openai_object.OpenAIObject'>
GT: 107
Pred:57
Correct: False

<class 'openai.openai_object.OpenAIOb