In [53]:
import os
import json
from json import JSONDecodeError
import pandas as pd
import google.generativeai as palm

### Credentials

In [3]:
api_key = os.getenv("PALM_API_KEY")
palm.configure(api_key=api_key)

### Importing Data
We will be going through a list of reviews and extracting meaningful information from them.

In [27]:
data = pd.read_csv("data/amazon_reviws_Watches.csv")

In [6]:
data.head(2)

Unnamed: 0,text
0,Joe Orlow\nExcellent\nReviewed in the United S...
1,Tin Can Sailor\nVINE VOICE\nExcellent watch\nR...


In [46]:
review = data.text[1]
review

'Tin Can Sailor\nVINE VOICE\nExcellent watch\nReviewed in the United States on September 15, 2023\nColor: Green Slip-ThruVerified Purchase\nI am a tinkerer, and my wife knows I love watches. I told her that I loved them when we met 24 years ago, and when in doubt about gifts, she defaults to a watch. I have Rolexes, Movados, Patek Philippe, a Breitling, an Omega, and (the last gift, for my 60th birthday), a Chopard. She also bought me a Seiko a few years ago. The reason for the background isn\'t to brag on my watch collection... In fact, when I told her I liked watches, I meant that I like to wear them, but a Seiko or Timex is more my speed. So, I wore the Seiko when doing yard work, rebuilding engines, tinkering under the hood of my antique cars. My son was helping me one day in the garage and he said, "Dad, why are you wearing a $2500 watch while banging around under the hood of your truck?" I almost fell over. My last Seiko was about $100... I had no idea it was anywhere near that e

### Execution

#### Prompts and Context

In [47]:
context = """
Imagine you are going through a review that has been left on an ecommerce site
"""

In [48]:
prompt = f"""
Your task will be to extract the person's name and their topic of inquiry \
you will place this in a json object with the key's customer_name and topic \
Respond only with the json object requested\

The review is provided below
```
{review}
```
"""

#### API Call

In [49]:
completion = palm.chat(context=context, messages=prompt)


In [50]:

response = completion.last

#### Formating the Response

In [26]:

# Remove the '\n' characters and extra spaces
response = response.replace('\n', '').strip()

# Convert the JSON string into a Python dictionary
response_json = json.loads(response)

# Now, you can use the 'data' dictionary in your Python code
print(response_json)


{'customer_name': 'Joe Orlow', 'topic': 'Timex Scout 40 watch review'}


#### Appending Fields
Once we have extracted the meaningful information using the LLM, we then attach it to each review. As we are using a dataframe we will introduce new columns for that.

In [32]:
list(response_json.keys())

['customer_name', 'topic']

### Putting it All Together
We will now repeat this for the entire dataframe.

#### Helper Functions

In [105]:
import json
from tqdm import tqdm

In [95]:
def string_to_json(response):
    # Remove the '\n' characters and extra spaces
    response = response.replace('\n', '').strip()
    # Convert the JSON string into a Python dictionary
    json_content = json.loads(response)

    # Parse the JSON content into a dictionary
    try:
        data = json.loads(json_content)
        print(data)
    except JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")

#### Full Process

In [104]:
new_fields = []
for index, review in tqdm(data.text.iteritems()):

    prompt = f"""
    Your task will be to extract the person's name and their topic of inquiry \
    you will place this in a json object with the key's customer_name and topic \
    If you do not find the information asked for, do the following \
        - Return a dictionary with customer_name and topic keys \
        - Indicate None as the value for the key with missing information

    The review is provided below
    ```
    {review}
    ```

    Remember, respond only with the json object requested, nothing more\
    """

    # print(prompt)

    completion = palm.chat(context=context, messages=prompt)

    response = completion.last
    new_fields.append(response)
new_fields

4it [00:11,  2.82s/it]


['Sure, here is the JSON object you requested:\n\n```json\n{\n  "customer_name": "Joe Orlow",\n  "topic": "Timex Scout 40 watch"\n}\n```',
 'The customer\'s name is Tin Can Sailor. The topic of inquiry is the durability of the watch.\n\nThe JSON object is:\n\n```\n{\n  "customer_name": "Tin Can Sailor",\n  "topic": "durability"\n}\n```',
 '```\n{\n  "customer_name": "mz johansen",\n  "topic": "Timex watch"\n}\n```',
 '```\n{\n  "customer_name": "Cameron E",\n  "topic": "Easy to read and use, simple style"\n}\n```']

In [100]:
data['llm_response'] = new_fields

In [101]:
data

Unnamed: 0,text,llm_response
0,Joe Orlow\nExcellent\nReviewed in the United S...,"Sure, here is the JSON object you requested:\n..."
1,Tin Can Sailor\nVINE VOICE\nExcellent watch\nR...,"```\n{\n ""customer_name"": ""Tin Can Sailor"",\n..."
2,mz johansen\nVINE VOICE\nAnother Excellent Tim...,"```\n{\n ""customer_name"": ""mz johansen"",\n ""..."
3,"Cameron E\nVINE VOICE\nEasy to read and use, s...","```\n{\n ""customer_name"": ""Cameron E"",\n ""to..."
