In [1]:
from dotenv import load_dotenv
load_dotenv()

import os 
import os.path as osp
import json

import tiktoken
import ollama
from openai import AzureOpenAI
from transformers import AutoTokenizer
import pprint

There was a problem when trying to write in your cache folder (/DATA1/HuggingFace/hub). You should set the environment variable TRANSFORMERS_CACHE to a writable directory.
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [2]:
azure_openai_api_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
deployment_name = "gpt-4"
project_path = '/home/mou/Projects/COLING-LogicLLM/LogLM'
data_path = osp.join(project_path, 'data')
demonstration_path = osp.join(project_path, 'src/logicllm/prompts/baseline')
datasets = ['FOLIO', 'AR-LSAT', 'LogicalDeduction', 'ProntoQA', 'ProofWriter']

# Prompt example

In [3]:
dataset_name = 'FOLIO'
split = 'dev'
mode = 'Direct'
stop_words = "------"
label_phrase = 'The correct option is:'


with open(os.path.join(data_path, dataset_name, f'{split}.json')) as f:
    raw_dataset = json.load(f)

with open(os.path.join(demonstration_path, f'{dataset_name}.txt')) as f:
    prompt_template = f.read()
    
def prompt_folio(prompt_template, test_example):
    problem = test_example['context']
    question = test_example['question'].strip()
    full_prompt = prompt_template.replace('[[PROBLEM]]', problem).replace('[[QUESTION]]', question)
    return full_prompt

prompt_creators = {
    'FOLIO': prompt_folio
}
prompt_creator = prompt_creators[dataset_name]

example = raw_dataset[0]
question, answer = example['question'], example['answer']
full_prompt = prompt_creator(prompt_template, example)

print(full_prompt)

# batch_size = 10
# dataset_chunks = [raw_dataset[i:i + batch_size] for i in range(0, len(raw_dataset), batch_size)]

Given a problem description and a question. The task is to parse the problem and the question into first-order logic formulars.
The grammar of the first-order logic formular is defined as follows:
1) logical conjunction of expr1 and expr2: expr1 ∧ expr2
2) logical disjunction of expr1 and expr2: expr1 ∨ expr2
3) logical exclusive disjunction of expr1 and expr2: expr1 ⊕ expr2
4) logical negation of expr1: ¬expr1
5) expr1 implies expr2: expr1 → expr2
6) expr1 if and only if expr2: expr1 ↔ expr2
7) logical universal quantification: ∀x
8) logical existential quantification: ∃x
------
Problem:
All people who regularly drink coffee are dependent on caffeine. People either regularly drink coffee or joke about being addicted to caffeine. No one who jokes about being addicted to caffeine is unaware that caffeine is a drug. Rina is either a student and unaware that caffeine is a drug, or neither a student nor unaware that caffeine is a drug. If Rina is not a person dependent on caffeine and a st

# OpenAI

In [4]:
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    encoding = tiktoken.encoding_for_model(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens
print(f'Number of tokens in prompt GPT-4: {num_tokens_from_string(full_prompt, "gpt-4")}')

client = AzureOpenAI(
    api_key=azure_openai_api_key,  
    api_version="2024-02-01",
    azure_endpoint = azure_openai_endpoint
)
response = client.chat.completions.create(
    model = deployment_name,
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": full_prompt}
    ],
    temperature = 0.0,
    top_p = 1.0,
    stop = stop_words
)

print(response.choices[0].message.content)

# generated_content = response.choices[0].message.content.strip()
# generated_answer = generated_content.split(label_phrase)[-1].strip()
# generated_reasoning = generated_content.split(label_phrase)[0].strip()

# output_json = {'id': example['id'], 
#             'question': question, 
#             'answer': answer, 
#             'predicted_reasoning': generated_reasoning,
#             'predicted_answer': generated_answer}
# print(f'id : {example["id"]}')
# print(f'question : {question}')
# print(f'answer : {answer}')
# # print(f'predicted reasoning : {generated_reasoning}')
# print(f'predicted answer : {generated_answer}')

Number of tokens in prompt GPT-4: 1506
Predicates:
Perform(x) ::: x performs in school talent shows often.
Attend(x) ::: x attends and is very engaged with school events.
Inactive(x) ::: x is an inactive and disinterested member of their community.
Chaperone(x) ::: x chaperones high school dances.
Student(x) ::: x is a student who attends the school.
Young(x) ::: x is a young child or teenager who wishes to further their academic careers and educational opportunities.
Premises:
∀x (Perform(x) → Attend(x)) ::: If people perform in school talent shows often, then they attend and are very engaged with school events.
∀x (Perform(x) ⊕ Inactive(x)) ::: People either perform in school talent shows often or are inactive and disinterested members of their community.
∀x (Chaperone(x) → ¬Student(x)) ::: If people chaperone high school dances, then they are not students who attend the school.
∀x (Inactive(x) → Chaperone(x)) ::: All people who are inactive and disinterested members of their communi

# Llama 3.1

In [5]:
# llama31_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B")
# tokens = llama31_tokenizer.encode(full_prompt)
# print(f'Number of tokens in prompt Llama 3.1-8B: {len(tokens)}')

stream = ollama.chat(
    model='llama3.1',
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": full_prompt}
    ],
    stream=False,
    options={
        'temperature': 0.0,
        'seed': 47,
    }
)
print(stream['message']['content'])

I'll parse the problem and question into first-order logic formulas for each of the three problems.

**Problem 1**

Predicates:

* Dependent(x) ::: x is a person dependent on caffeine
* Drinks(x) ::: x regularly drinks coffee
* Jokes(x) ::: x jokes about being addicted to caffeine
* Unaware(x) ::: x is unaware that caffeine is a drug
* Student(x) ::: x is a student

Premises:

1. ∀x (Drinks(x) → Dependent(x)) ::: All people who regularly drink coffee are dependent on caffeine.
2. ∀x (Drinks(x) ⊕ Jokes(x)) ::: People either regularly drink coffee or joke about being addicted to caffeine.
3. ∀x (Jokes(x) → ¬Unaware(x)) ::: No one who jokes about being addicted to caffeine is unaware that caffeine is a drug.
4. (Student(rina) ∧ Unaware(rina)) ⊕ ¬(Student(rina) ∨ Unaware(rina)) ::: Rina is either a student and unaware that caffeine is a drug, or neither a student nor unaware that caffeine is a drug.
5. ¬(Dependent(rina) ∧ Student(rina)) → (Dependent(rina) ∧ Student(rina)) ⊕ ¬(Dependent(rin