In [71]:
from kor.extraction import create_extraction_chain
from kor.nodes import Object, Text, Number, Bool
from langchain.chat_models import ChatOpenAI

import os
import pandas as pd

# Setup API Keys
with open('config/config.txt') as f:
    for line in f:
        env_data = line.split(',')
        os.environ[env_data[0]] = env_data[1]

# Initialize Model
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0,
    max_tokens=2000,
)

# Get Review Data
reviews = pd.read_csv('data/review_sample.csv', index_col=0)

In [89]:
# Defines the schema for extracting the relevant ideas from a yelp review. 
idea_schema = Object(
    id="Ideas",
    description="All of the relevat ideas expressed by a customer writing a review for a business.",
    examples=[
        ("As far as diners go, you just don't get much better than this. Worth the wait, great staff behind the counter. A regular spot for us to visit when in the Big Easy.", [{"idea": "one of the best diners"}, {"idea": "worth the wait"}, {"idea": "great staff"}, {"idea": "visits regularly when in the area"}])
    ],
    attributes=[
        Text(
            id="idea",
            description="A short sentence describing an idea expressed in the text. Make sure the idea reflects what the customer is conveying not what they literally say. No more than 10 words.",
        )
    ],
    many=True,
)

# Initialize the chain with the schema.
extraction_chain = create_extraction_chain(llm, idea_schema)


review_sample = reviews.iloc[20:25]
review_sample

Unnamed: 0,review_id,text
20,Tmg23-TKzaI8RrGz4pVmAA,"The staff is attentive and a little abrupt, bu..."
21,hpc9hWgqWl-Mx5Pwb6On1A,"I never understood what ""light and fluffy"" egg..."
22,peC35r9sMcjX0dvCpgQv-g,Great diner experience. The french fry omelett...
23,tMESSwaI19TSjN-9N5NBkg,Just went there for the first time in 40 years...
24,GqoV3TutjHp5gwyIOl7JBQ,I live a few blocks away from this place... if...


In [91]:
data = {
    'review_id': [],
    'idea': []
}

for i in range(review_sample.shape[0]):
    review = review_sample.iloc[i]
    ideas = extraction_chain.invoke(review['text'])['text']['data']['Ideas']
    for idea in ideas:
        data['review_id'].append(review['review_id'])
        data['idea'].append(idea['idea'])

data = pd.DataFrame(data)

In [92]:
data

Unnamed: 0,review_id,idea
0,Tmg23-TKzaI8RrGz4pVmAA,attentive and abrupt staff
1,Tmg23-TKzaI8RrGz4pVmAA,unique lingo among cooks
2,Tmg23-TKzaI8RrGz4pVmAA,delicious burgers
3,Tmg23-TKzaI8RrGz4pVmAA,best burgers in the area
4,Tmg23-TKzaI8RrGz4pVmAA,reasonably priced
5,Tmg23-TKzaI8RrGz4pVmAA,will return next time in New Orleans
6,hpc9hWgqWl-Mx5Pwb6On1A,light and fluffy eggs
7,hpc9hWgqWl-Mx5Pwb6On1A,best breakfast
8,hpc9hWgqWl-Mx5Pwb6On1A,omelettes are exceptional
9,hpc9hWgqWl-Mx5Pwb6On1A,chefs special is a bargain


In [93]:
# Defines the schema for merging ideas
merge_schema = Object(
    id="Merge",
    description="Merges two the two ideas presented into one sentence and decides whether they express the same meaning or not using a boolean.",
    examples=[
        ("idea 1: atmosphere sets the place apart \nidea 2: good atmosphere", {'idea':'positive atmosphere', 'equivalent':True}),
        ("idea 1: burgers are great \nidea 2: terrific hamburgers", {'idea':'great hamburgers', 'equivalent':True}),
        ("idea 1: will return \nidea 2: true original gem", {'idea':'positive atmosphere', 'equivalent':False})
    ],
    attributes=[
        Text(
            id="idea",
            description="A short sentence that captures the meaning of both ideas presented.",
        ),
        Bool(
            id="equivalent",
            description="Returns true if the ideas presented express the same core idea returns false otherwise."
        )
    ],
    many=False,
)

# Initialize a chain with the schema.
merge_chain = create_extraction_chain(llm, merge_schema)

In [96]:
# merge_chain.invoke()
a = data.iloc[2].idea
b = data.iloc[25].idea

chain_input = f'idea 1: {a} \nidea 2: {b}'

a, b

('delicious burgers', 'good burgers')

In [97]:
merge_chain.invoke(chain_input)

{'text': {'data': {'Merge': [{'idea': 'tasty burgers', 'equivalent': 'True'}]},
  'raw': 'idea|equivalent\ntasty burgers|True',
  'errors': [],
  'validated_data': {}}}