In [57]:
# import necessary libraries
import numpy as np 
import pandas as pd 
import os, sys
from pydantic import BaseModel
from typing import List, Tuple, Literal, Optional

from llm_model import ChatLLM
from prompts.prompt_v1 import get_prompt_v1
from prompts.prompt_v2 import get_prompt_v2
from prompts.prompt_v3 import get_prompt_v3

from structured_output import StructuredOutput  

### Load the processed dataset

In [58]:
data = pd.read_csv(r'C:\Users\admin\Documents\Review Prompting\data\yelp_preprocessed.csv')
data.head(3)

Unnamed: 0,stars,text,cool,useful,funny
0,5,My wife took me here on my birthday for breakf...,2,5,0
1,5,I have no idea why some people give bad review...,0,0,0
2,4,love the gyro plate. Rice is so good and I als...,0,1,0


### Split the data in features and target variable

In [59]:
subset_data = data.loc[:101]
x_train, y_train = subset_data.drop(columns=['stars']), subset_data['stars']

In [60]:
# load model
model = ChatLLM(provider='groq', model_name='llama-3.1-8b-instant').model

# create chain 1 using promptv1
prompt1= get_prompt_v1()
chain1 = prompt1 | model.with_structured_output(StructuredOutput)

# create chain 2 using promptv2
prompt2 = get_prompt_v2()
chain2 = prompt2 | model.with_structured_output(StructuredOutput)

# create chain 3 using promptv3
prompt3 = get_prompt_v3()
chain3 = prompt3 | model.with_structured_output(StructuredOutput)

### Evaluation of multiple prompts on a subset of processed dataset

In [61]:
chain1

PromptTemplate(input_variables=['reactions', 'review_text'], input_types={}, partial_variables={}, template='\n    You are an expert sentiment analysis system. Your task is to predict the star rating (1-5) for a given review.\n\n    Analyze the following review properties:\n    Review Text: {review_text}\n    Reactions: {reactions}\n\n    Output Instructions:\n    - You MUST return a valid JSON object strictly following the schema below.\n    - Do NOT return any preamble, explanation text, or markdown code blocks (like ```json).\n    - Just return the raw JSON.\n    ')
| RunnableBinding(bound=ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 8192, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x00000163E44E1550>, async_client=<groq.resources.chat.completions.AsyncCo

In [62]:
chain1.invoke({
    'reactions': {
        'cool': '1',
        'useful': '2',
        'funny': '3'
    },
    'review_text': 'The movie wal good and funny'})

StructuredOutput(predicted_stars=4, explanation="The review contains positive words like 'good' and 'funny', which suggests a high star rating. The reactions also support this, with 'funny' being a strong indicator of a positive review.")

In [63]:
chain2.invoke({
    'review_text': 'Movie was good',
    'reactions': {
        'cool': '1',
        'useful': '2',
        'funny': '3'
    }}) 

chain3.invoke({
    'review_text': 'Movie was good',
    'reactions': {
        'cool': '1',
        'useful': '2',
        'funny': '3'
    }})

StructuredOutput(predicted_stars=3, explanation="The reviewer states a neutral opinion about the movie, neither praising nor criticizing it. The 'useful' reaction count is low, indicating the review may not be helpful in making a purchasing decision. Based on the neutral sentiment and lack of specific features or flaws, the predicted rating is 3, indicating a mediocre experience.")

In [65]:
chain2.invoke({
    'review_text': 'Movie was good',
    'reactions': {
        'cool': '1',
        'useful': '2',
        'funny': '3'
    }}) 

StructuredOutput(predicted_stars=4, explanation="Based on the review content 'Movie was good', the overall sentiment is positive. Although the tone is neutral, the presence of positive keywords like 'good' hints at a high rating. Considering the reactions, many people found the review 'useful', further supporting a high rating.")

In [77]:
import os
import time
import json
import pandas as pd

start = time.time()

os.makedirs("results", exist_ok=True)

outputs_path = "results/outputs.json"
preds_path = "results/predictions.csv"

# load once
if os.path.exists(outputs_path):
    with open(outputs_path, "r") as f:
        outputs = json.load(f)
else:
    outputs = {}

if os.path.exists(preds_path):
    predictions = pd.read_csv(preds_path)
else:
    predictions = pd.DataFrame(
        columns=["prompt1", "prompt2", "prompt3", "actual"]
    )

def to_dict(result):
    if hasattr(result, "model_dump"):
        return result.model_dump()
    if isinstance(result, dict):
        return result
    if isinstance(result, str):
        try:
            return json.loads(result)
        except json.JSONDecodeError:
            return {"raw": result}
    return {"raw": str(result)}

for i in range(x_train.shape[0]):
    print(f"processing review: {i}")
    x_train_i = x_train.iloc[i]
    review_text = x_train_i["text"]

    if review_text in outputs:
        continue

    reaction_payload = {
        "cool": x_train_i["cool"],
        "useful": x_train_i["useful"],
        "funny": x_train_i["funny"],
    }

    res1 = chain1.invoke({"review_text": review_text, "reactions": reaction_payload})
    res2 = chain2.invoke({"review_text": review_text, "reactions": reaction_payload})
    res3 = chain3.invoke({"review_text": review_text, "reactions": reaction_payload})

    res1_dict = to_dict(res1)
    res2_dict = to_dict(res2)
    res3_dict = to_dict(res3)

    new_row = [
        res1_dict.get("predicted_stars"),
        res2_dict.get("predicted_stars"),
        res3_dict.get("predicted_stars"),
        y_train.iloc[i],
    ]
    predictions.loc[len(predictions)] = new_row

    outputs[review_text] = {
        "prompt1": res1_dict,
        "prompt2": res2_dict,
        "prompt3": res3_dict,
    }

    # ---- save after each iteration ----
    with open(outputs_path, "w") as f:
        json.dump(outputs, f, indent=2)
    predictions.to_csv(preds_path, index=False)

print("Time taken: ", time.time() - start)


processing review: 0
processing review: 1
processing review: 2
processing review: 3
processing review: 4
processing review: 5
processing review: 6
processing review: 7
processing review: 8
processing review: 9
processing review: 10
processing review: 11
processing review: 12
processing review: 13
processing review: 14
processing review: 15
processing review: 16
processing review: 17
processing review: 18
processing review: 19
processing review: 20
processing review: 21
processing review: 22
processing review: 23
processing review: 24
processing review: 25
processing review: 26
processing review: 27
processing review: 28
processing review: 29
processing review: 30
processing review: 31
processing review: 32
processing review: 33
processing review: 34
processing review: 35
processing review: 36
processing review: 37
processing review: 38
processing review: 39
processing review: 40
processing review: 41
processing review: 42
processing review: 43
processing review: 44
processing review: 4