In [37]:
import anthropic
import os
from dotenv import load_dotenv
from pydantic import BaseModel, Field


load_dotenv()

ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")


class UserDetail(BaseModel):
    name: str
    age: int


client = anthropic.Anthropic()

#models = 
# claude-3-sonnet-20240229
# claude-3-opus-20240229

model = ["claude-3-sonnet-20240229", "claude-3-opus-20240229"]
def call_model(text: str, system: str="You are a helpful assistant."):
    message = client.messages.create(
        model=model[0],
        max_tokens=4000,
        temperature=0,
        system=system,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": text
                    }
                ]
            }
        ]
    )
    return message

# Here's a JSON schema to follow: {schema} Output a valid JSON object but do not repeat the schema.
# You are an expert in outputting JSON. ONLY output json following the following pydantic schema {pydantic_model.model_json_schema()}
def getJsonOutput(text: str, pydantic_model: BaseModel):
    message = client.messages.create(
        model=model[0],
        max_tokens=1000,
        temperature=0,
        system=f"Here's a JSON schema to follow: {pydantic_model.model_json_schema()} Output a valid JSON object but do not repeat the schema.",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": text
                    }
                ]
            }
        ]
    )
    return message

In [38]:
data = ["Brandon is 30", "Jason is 22", "Duyen is 25", "John is 99"]

In [39]:
for d in data:
    text = f"Extract the user detail from the following text: {d}"
    message = getJsonOutput(text, UserDetail)
    try:
        print(message.content[0].text)
        print(UserDetail.model_validate_json(message.content[0].text))
    except Exception as e:
        print(message.content[0].text)

{
  "name": "Brandon",
  "age": 30
}
name='Brandon' age=30
{
  "name": "Jason",
  "age": 22
}
name='Jason' age=22
{
  "name": "Duyen",
  "age": 25
}
name='Duyen' age=25
{
  "name": "John",
  "age": 99
}
name='John' age=99


In [40]:
class UserDetailOccupation(BaseModel):
    name: str = Field(description="The name of the user")
    age: int = Field(description="The age of the user")
    occupation: str = Field(description="The occupation of the user")

data = ["Brandon is 30 and he is a data scientist", "Jason is 22 and he is a data engineer", "Duyen is 25 and she is a GOAT", "John is 99 and is a CEO"]


success = 0
for d in data:
    text = f"Extract user detail from the following text: {d}"
    message = getJsonOutput(text, UserDetailOcupation)
    #print(message.content[0].text)
    try:
        print(UserDetailOcupation.model_validate_json(message.content[0].text))
        success += 1
    except Exception as e:
        print(e)
        print(message.content[0].text)
        print("Failed")

print(f"{success}/{len(data)} correct")

name='Brandon' age=30 occupation='data scientist'
name='Jason' age=22 occupation='data engineer'
name='Duyen' age=25 occupation='GOAT'
name='John' age=99 occupation='CEO'
4/4 correct


In [41]:
import json
file = open("boolean-expressions.json")
bool_evals = json.load(file)
print(bool_evals['examples'])

[{'input': 'not ( True ) and ( True ) is', 'target': 'False'}, {'input': 'True and not not ( not False ) is', 'target': 'True'}, {'input': 'not True or False or ( False ) is', 'target': 'False'}, {'input': 'False or not ( True ) and False is', 'target': 'False'}, {'input': 'True or not False and True and False is', 'target': 'True'}, {'input': 'False or not not not False and True is', 'target': 'True'}, {'input': 'not True and ( False or True ) is', 'target': 'False'}, {'input': 'True and not False or ( True ) is', 'target': 'True'}, {'input': 'not True or ( False and True ) is', 'target': 'False'}, {'input': 'not True or ( True or False ) is', 'target': 'True'}, {'input': 'False or ( False ) and not False is', 'target': 'False'}, {'input': 'not False or True and False and False is', 'target': 'True'}, {'input': 'not True or False or not not True is', 'target': 'True'}, {'input': 'True and True and False and not True is', 'target': 'False'}, {'input': 'not not True and not True or True

In [42]:
class BooleanEval(BaseModel):
    reasoning: str 
    answer: bool

In [43]:
success = 0
correct_bool = 0

for idx, b in enumerate(bool_evals['examples']):
    if idx == 10:
        break
    text = f"Evaluate the expression. Make sure to output the reasoning and answer correctly into json. Expression: {b['input']}"
    message = getJsonOutput(text, BooleanEval)
    #print(message.content[0].text)
    try:
        parsed_output = BooleanEval.model_validate_json(message.content[0].text)
        print(f"Reasoning: {parsed_output.reasoning}")
        print(f"Answer: {parsed_output.answer}")
        print(f"Target: {b['target']}")
        if str(parsed_output.answer) == b['target']:
            print("Correct")
            correct_bool += 1
        else:
            print("Incorrect")
        success += 1
    except Exception as e:
        print(f"parse error: {b['input']}")
        #print(message.content[0].text)
        #print(f"Target: {b['target']}")
        continue
print(f"{success}/10 successfully parsed")
print(f"{correct_bool}/10 correct reasoning")

Reasoning: The expression 'not (True) and (True)' can be evaluated as follows:

1. 'not (True)' evaluates to False because the logical NOT of True is False.
2. '(True)' evaluates to True.
3. 'False and True' evaluates to False because in logical AND, if one operand is False, the entire expression is False regardless of the other operand.
Answer: False
Target: False
Correct
Reasoning: The expression 'True and not not (not False)' can be simplified as follows:

not False = True
not not True = True
True and True = True
Answer: True
Target: True
Correct
Reasoning: The expression 'not True or False or (False)' can be evaluated as follows:

1. 'not True' is False
2. 'False or False' is False
3. 'False or (False)' is False
4. 'False or False or False' is False
Answer: False
Target: False
Correct
Reasoning: The expression 'False or not (True) and False' can be evaluated as follows:

1. 'not True' evaluates to False
2. 'False and False' evaluates to False
3. 'False or False' evaluates to False


In [24]:
print(f"{success}/10 correct parsing")
print(f"{correct_bool}/10 correct reasoning")

10/10 correct parsing
9/10 correct reasoning


In [44]:
class CausalJudgement(BaseModel):
    answer_reasoning: str
    answer: str

c_file = open("causal_judgement.json")
causal_judgements = json.load(c_file)
print(causal_judgements['examples'])

success = 0
correct_cause = 0

for idx, b in enumerate(causal_judgements['examples']):
    if idx == 10:
        break
    text = f"Output the answer to the following question into json. Question: {b['input']}"
    message = getJsonOutput(text, CausalJudgement)
    #print(message.content[0].text)
    try:
        parsed_output = CausalJudgement.model_validate_json(message.content[0].text)
        print(f"Reasoning: {parsed_output.answer_reasoning}")
        print(f"Answer: {parsed_output.answer}")
        print(f"Target: {b['target']}")
        if str(parsed_output.answer) == b['target']:
            print("Correct")
            correct_cause += 1
        else:
            print("Incorrect")
        success += 1
    except Exception as e:
        print(f"parse error: {b['input']}")
        #print(message.content[0].text)
        #print(f"Target: {b['target']}")
        continue
print(f"{success}/10 successfully parsed")
print(f"{correct_cause}/10 correct reasoning")

[{'input': 'How would a typical person answer each of the following questions about causation?\nA machine is set up in such a way that it will short circuit if both the black wire and the red wire touch the battery at the same time. The machine will not short circuit if just one of these wires touches the battery. The black wire is designated as the one that is supposed to touch the battery, while the red wire is supposed to remain in some other part of the machine. One day, the black wire and the red wire both end up touching the battery at the same time. There is a short circuit. Did the black wire cause the short circuit?\nOptions:\n- Yes\n- No', 'target': 'No'}, {'input': "How would a typical person answer each of the following questions about causation?\nLong ago, when John was only 17 years old, he got a job working for a large manufacturing company. He started out working on an assembly line for minimum wage, but after a few years at the company, he was given a choice between tw

In [36]:
class DateUnderstanding(BaseModel):
    answer_reasoning: str
    answer: str

d_file = open("date_understanding.json")
date_understand = json.load(d_file)
print(date_understand['examples'])

success = 0
correct_date = 0

for idx, b in enumerate(date_understand['examples']):
    if idx == 10:
        break
    text = f"Output the answer to the following question into json. Question: {b['input']}"
    message = getJsonOutput(text, DateUnderstanding)
    #print(message.content[0].text)
    try:
        parsed_output = DateUnderstanding.model_validate_json(message.content[0].text)
        print(f"Reasoning: {parsed_output.answer_reasoning}")
        print(f"Answer: {parsed_output.answer}")
        print(f"Target: {b['target']}")
        if b['target'] in parsed_output.answer:
            print("Correct")
            correct_date += 1
        else:
            print("Incorrect")
        success += 1
    except Exception as e:
        print(f"parse error: {b['input']}")
        #print(message.content[0].text)
        #print(f"Target: {b['target']}")
        continue
print(f"{success}/10 successfully parsed")
print(f"{correct_date}/10 correct reasoning")

[{'input': 'Today is Christmas Eve of 1937. What is the date tomorrow in MM/DD/YYYY?\nOptions:\n(A) 12/11/1937\n(B) 12/25/1937\n(C) 01/04/1938\n(D) 12/04/1937\n(E) 12/25/2006\n(F) 07/25/1937', 'target': '(B)'}, {'input': 'In the UK, people usually put the day before the month when formatting the date. Therefore, today is 02/01/1987 to them. What is the date a month ago in MM/DD/YYYY?\nOptions:\n(A) 12/02/1986\n(B) 12/01/1986\n(C) 03/02/1986\n(D) 12/02/2032\n(E) 12/02/2062\n(F) 02/06/1987', 'target': '(A)'}, {'input': 'Jane and John married on Jan 2, 1958. It is their 5-year anniversary today. What is the date one week from today in MM/DD/YYYY?\nOptions:\n(A) 01/16/1961\n(B) 01/09/1961\n(C) 01/09/1963\n(D) 03/09/1960\n(E) 07/09/1960\n(F) 02/28/1961', 'target': '(B)'}, {'input': 'Jane scheduled 3 apointments with 5 poeple for tomorrow (Tue, 7/9/1972). What is the date one week ago from today in MM/DD/YYYY?\nOptions:\n(A) 07/09/1972\n(B) 12/01/1972\n(C) 07/01/2008\n(D) 05/13/1972\n(E) 07/