In [147]:
from langchain_ollama import OllamaLLM
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain.agents import create_agent
from typing import List
from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser
# from langchain.agents.structured_output import 
import pandas as pd
import numpy as np
pd.options.display.max_columns = None

In [148]:
class Review(BaseModel):
    review: str = Field(
        description="Text of the review"
    )
    stars: int = Field(
        description="The quantity of stars on the rating"
    )


In [149]:
llm = OllamaLLM(
    model="gemma3:12b",
    temperature=0.5,
    validate_model_on_init=True,
    seed=1234,
    num_ctx=65536,
    reasoning=False
    
)

In [150]:
parser = PydanticOutputParser(pydantic_object=Review)

str_prompt = """
You are a product review generator.

Your task is to create a customer-style product review based on the information provided below.

Product name: {product_name}
Category: {product_category}
Review size: {review_size}
Review type: {type}
Aspect being reviewed: {content}

Guidelines:

1. The review must sound natural, like a real customer experience.
2. The tone must strictly follow the review type:
   - If the type is positive, the overall sentiment must be positive.
   - If the type is negative, the overall sentiment must be negative.
3. Reviews may be controversial:
   - The text may include mixed opinions.
   - The star rating may be slightly unexpected.
   - Even so, the final sentiment must always respect the defined review type.
4. The review must focus mainly on the specified aspect:
   - Delivery
   - Product appearance
   - Functionality
   - Price
5. The product name may be mentioned in the review, but prefer to not mention.
6. Follow the review size rules:
   - Small: exactly one sentence.
   - Medium: at least two sentences.
   - Long: one paragraph with at least 5 lines.
7. Assign a star rating from 1 to 5 that matches the overall sentiment:
   - Negative reviews → 1 or 2 stars (controversial cases may slightly vary)
   - Neutral or mixed but positive → 3 stars
   - Clearly positive → 4 or 5 stars
Output format (strictly follow this format):

Review:
<generated review text>

Stars:
<number from 1 to 5>

{format_instructions}
"""

prompt = ChatPromptTemplate(
      (["system", str_prompt]),
      partial_variables={"format_instructions": parser.get_format_instructions()}
      )

In [151]:
df = pd.read_csv('data/products.csv')
products = df.values

In [152]:
list_review_size = ['Small', 'Medium', 'Long']
list_type = ['Positive', 'Negative']
list_content = [
    'Delivery',
    'Product appearance',
    'Functionality',
    'Price',
]
list_index_produtcs = [i for i in range(1, 100, 1)]
 

In [153]:
chain = prompt | llm | parser

In [154]:
qt_reviews = 1000
i = 0

In [None]:
list_of_reviews = []
while i <= qt_reviews:

    review_size =  np.random.choice(list_review_size, size=None, replace=True, p=None)
    type = np.random.choice(list_type, size=None, replace=True, p=None)
    content = np.random.choice(list_content, size=np.random.randint(1, len(list_content)), replace=False, p=None)
    index = np.random.choice(list_index_produtcs, size=None, replace=True, p=None)
    product = products[index]

    try:
        output = chain.invoke(
            {
                "product_name":product[0],
                "product_category":product[1],
                "review_size":review_size,
                "type":type,
                "content":content
            }
            )
        
        data = {
            "product_name":product[0],
            "product_category":product[1],
            "review_size":review_size,
            "type":type,
            "content":content,
            "review":output.review,
            "stars":output.stars
        }
        list_of_reviews.append(data)
        if i%100 == 0 and i != 0:
            pd.DataFrame(list_of_reviews).to_csv("reviews.csv", index=False)

        i += 1
    except Exception as e:
        pd.DataFrame(list_of_reviews).to_csv("reviews.csv", index=False)
        raise(e)
        
