In [15]:
import os
import json
import anthropic
from time import sleep
# Set the environment variable within the notebook
os.environ["ANTHROPIC_API_KEY"] = ""

client = anthropic.Anthropic()

In [2]:
with open("reviews.json", 'r') as f:
    reviews = json.load(f)
print(len(reviews))

2346


In [None]:
reviewTexts = []  
batch = ""  
counter = 1

for review in reviews:
    batch += "review_" + str(counter) + ": " + review["review_" + str(counter)]["Review"] + " (END OF REVIEW)\n"
    
    #storing reviews in batches of 30 reviews due to api limit restrictions
    if counter % 30 == 0:
        reviewTexts.append(batch)  
        batch = ""  
    
    counter += 1

#if anything left in batch variable, add it to the review text aswell
if batch:
    reviewTexts.append(batch)

print(f"Total batches created: {len(reviewTexts)}")
print()
print(reviewTexts[0])


Total batches created: 79

review_1: For the price not worth going back. Thing more the add than that! (END OF REVIEW)
review_2: Entertaining some guests from out of town, they remarked how this was the best meal they had in sf.Wagyu skewers, beef noodle soup, hamachi (best value!), and the a5 dumpling was our favorite. (END OF REVIEW)
review_3: Food was outstanding, service was relaxed but good. Their beef noodle soup, wagyu, and uni were the highlights, but everything else we tried was excellent too.There was an event taking place that evening. Made it a bit livelier but didn’t interfere with our dinner. (END OF REVIEW)
review_4: waited for 15 minutes in an empty restaurant and other than water no-one showed up. Food choices seemed very limited. waited 10 more minutes and walked out. No apologies, no words of how can we do better. I understand these are tough times, but having a restaurant in an expensive neighborhood, least you can do is have some service (END OF REVIEW)
review_5: 1

In [26]:
def generateMsgs(client,text):
    message = client.messages.create(
        model="claude-3-5-sonnet-20241022",
        max_tokens=5000,
        temperature=0,
        system="""You are a review analyst for my food restaurant.
                  You will stricly follow the following guidelines given in tripple ticks.
                  '''
                  1. You will do a sentiment analysis of the review. You will strictly stick to displaying either positive, negative or neutral for the output of sentiment analysis.
                  2. You will extract the comment(s) about the food.
                  3. You will extract the comment(s) about staff or service.
                  4. You are not allowed to add or subtract any information given the above points.
                  5. If the review has no information about anything mentioned above just write neutral in sentiment and write no information given in other output fields (STRICTLY FOLLOW THIS).
                  6. If there is some personal information given in the review replace it like following
                        1. For example if someone has given their phone number then write it as <phone-number>
                        2. If someone has given their name, write it as <name>
                        3. If someone has given their address, write it as <address>
                        4. Follow similar pattern for any other time of personal details/information  

                  7. Note that each review ends with (END OF REVIEW)\n
                  8. The review number should be same as the review number in the review.
                  9. You will process all reviews in the given text.
                  '''
                  The output should be formatted in the following order given in curly brackets. Do not add anything else to the output other than what is mentioned.
                  {
                    Review: (number of review. follow 1, 2, 3, 4... counting)
                    Sentiment: (positive,negative or neutral based on your analysis)
                    Comment about food: (extract the comment about food from the review)
                    Comment about staff/service: (extract the comment about staff/service from the review)                  
                  },
        """,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": text
                    }
                ]
            }
        ]
    )
    return message

responses = []
for batch in reviewTexts:
    try:
        response = generateMsgs(client,batch)
        responses.append(response.content[0].text)
        sleep(5)
    except:
        print("Exception occurred. Retring in 30 seconds...")
        sleep(30)

print(len(responses))


79


In [None]:
parsed_reviews = []  #list of dicts containing all reviews cleaned
review_counter = 1  #counter to keep track of reviews

for response in responses:
    lines = response.splitlines()  #since it was strings in dict format, we split it line by line
    current_review = {}  #temp dict to hold current review

    for line in lines:
        line = line.strip()  #remove leading and trailing spacing from current line
        if line.startswith("{"):
            #if it is { then it means new review starts, so we reset current_review dict
            current_review = {}
        elif line.startswith("}"):
            #if it is } then it is end of current review
            if current_review:  #check if review is not empty
                current_review["Review"] = review_counter  #add the review number
                parsed_reviews.append(current_review)
                review_counter += 1  
        elif line:  #if its not starting or ending bracket then we are dealing with key:value pair
            try:
                key, value = line.split(":", 1)  #split it by first colon
                key = key.strip()  #remove spaces
                value = value.strip()  #remove spaces
                if key != "Review":  #ignoring the review counter by claude AI to allow for our own counter
                    current_review[key] = value
            except ValueError:
                print(f"Skipping malformed line: {line}")

#save data into json
with open("analysis.json", "w", encoding="utf-8") as json_file:
    json.dump(parsed_reviews, json_file, indent=4)

#check how many reviews saved
print(f"Successfully processed {len(parsed_reviews)} reviews into a JSON file.")

Skipping malformed line: [Continued in next response due to length limit...]
Skipping malformed line: [Continued response in next part due to length limit]
Skipping malformed line: [Continued in next response due to length limit...]
Skipping malformed line: [Due to length constraints, I'll stop here but would continue with the same format for all remaining reviews]
Skipping malformed line: [Continue with the remaining reviews? Let me know if you want me to process more.]
Skipping malformed line: [Due to length constraints, I'll stop here but would continue with the same format for the remaining reviews]
Skipping malformed line: [Due to length constraints, I'll continue with the next reviews if needed]
Skipping malformed line: [Due to length constraints, I'll continue with the remaining reviews if needed]
Skipping malformed line: [Continued response in next part due to length limit]
Skipping malformed line: [Due to length constraints, I'll continue with a few more reviews. Would you lik