In [13]:
from dotenv import load_dotenv
load_dotenv()

import os 
import os.path as osp
import json
import tiktoken
import ollama
from openai import AzureOpenAI
from transformers import AutoTokenizer
import pprint

In [14]:
azure_openai_api_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
deployment_name = "gpt-4"

data_path = '/home/mou/Projects/COLING-LogicLLM/LOGLM/data'
demonstration_path = '/home/mou/Projects/COLING-LogicLLM/LOGLM/src/logicllm/prompts/baseline'
datasets = ['FOLIO', 'AR-LSAT', 'LogicalDeduction', 'ProntoQA', 'ProofWriter']

# Prompt example

In [15]:
dataset_name = 'FOLIO'
split = 'dev'
mode = 'Direct'
stop_words = "------"
label_phrase = 'The correct option is:'

def prompt_creator(in_context_example, test_example):
    full_prompt = in_context_example
    context = test_example['context'].strip()
    question = test_example['question'].strip()
    options = '\n'.join([opt.strip() for opt in test_example['options']])
    full_prompt = full_prompt.replace('[[CONTEXT]]', context)
    full_prompt = full_prompt.replace('[[QUESTION]]', question)
    full_prompt = full_prompt.replace('[[OPTIONS]]', options)
    return full_prompt

with open(os.path.join(data_path, dataset_name, f'{split}.json')) as f:
    raw_dataset = json.load(f)

with open(os.path.join(demonstration_path, f'{dataset_name}_{mode}.txt')) as f:
    in_context_examples = f.read()
    
example = raw_dataset[0]
question, answer = example['question'], example['answer']
full_prompt = prompt_creator(in_context_examples, example)

print(full_prompt)

# batch_size = 10
# dataset_chunks = [raw_dataset[i:i + batch_size] for i in range(0, len(raw_dataset), batch_size)]

Given a problem statement as contexts, the task is to answer a logical reasoning question. 
------
Context:
All people who regularly drink coffee are dependent on caffeine. People either regularly drink coffee or joke about being addicted to caffeine. No one who jokes about being addicted to caffeine is unaware that caffeine is a drug. Rina is either a student and unaware that caffeine is a drug, or neither a student nor unaware that caffeine is a drug. If Rina is not a person dependent on caffeine and a student, then Rina is either a person dependent on caffeine and a student, or neither a person dependent on caffeine nor a student.

Question: Based on the above information, is the following statement true, false, or uncertain? Rina is a person who jokes about being addicted to caffeine or unaware that caffeine is a drug.

Options:
A) True
B) False
C) Uncertain

The correct option is: A
------
Context:
William Dickinson was a British politician who sat in the House of Commons William 

# OpenAI

In [16]:
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    encoding = tiktoken.encoding_for_model(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens
print(f'Number of tokens in prompt GPT-4: {num_tokens_from_string(full_prompt, "gpt-4")}')


client = AzureOpenAI(
    api_key=azure_openai_api_key,  
    api_version="2024-02-01",
    azure_endpoint = azure_openai_endpoint
)
response = client.chat.completions.create(
    model = deployment_name,
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": full_prompt}
    ],
    temperature = 0.0,
    top_p = 1.0,
    stop = stop_words
)



generated_content = response.choices[0].message.content.strip()
print(generated_content)
generated_answer = generated_content.split(label_phrase)[-1].strip()
generated_reasoning = generated_content.split(label_phrase)[0].strip()


output_json = {'id': example['id'], 
            'question': question, 
            'answer': answer, 
            'predicted_reasoning': generated_reasoning,
            'predicted_answer': generated_answer}
print(f'id : {example["id"]}')
print(f'question : {question}')
print(f'answer : {answer}')
# print(f'predicted reasoning : {generated_reasoning}')
print(f'predicted answer : {generated_answer}')




Number of tokens in prompt GPT-4: 503
A) True
id : FOLIO_dev_0
question : Based on the above information, is the following statement true, false, or uncertain? Bonnie performs in school talent shows often.
answer : C
predicted answer : A) True


# Llama 3.1

In [17]:
llama31_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B")
tokens = llama31_tokenizer.encode(full_prompt)
print(f'Number of tokens in prompt Llama 3.1-8B: {len(tokens)}')

# stream = ollama.chat(
#     model='llama3.1',
#     messages=[
#         {"role": "system", "content": "You are a helpful assistant."},
#         {"role": "user", "content": full_prompt}
#     ],
#     stream=True,
# )
# for chunk in stream:
#   print(chunk['message']['content'], end='', flush=True)
stream = ollama.chat(
    model='llama3.1',
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": full_prompt}
    ],
    stream=False,
    options={
        'temperature': 0.0,
        'seed': 47,
    }
)
print(stream['message']['content'])

Number of tokens in prompt Llama 3.1-8B: 504
It seems like you're providing a series of context and questions. I'll answer each question based on the given information.

For the first question:

Context:
All people who regularly drink coffee are dependent on caffeine. People either regularly drink coffee or joke about being addicted to caffeine. No one who jokes about being addicted to caffeine is unaware that caffeine is a drug. Rina is either a student and unaware that caffeine is a drug, or neither a student nor unaware that caffeine is a drug. If Rina is not a person dependent on caffeine and a student, then Rina is either a person dependent on caffeine and a student, or neither a person dependent on caffeine nor a student.

Question: Based on the above information, is the following statement true, false, or uncertain? Rina is a person who jokes about being addicted to caffeine or unaware that caffeine is a drug.

Options:
A) True
B) False
C) Uncertain

Answer: A) True

Explanation