In [12]:
import pandas as pd

data_path = "/Users/deveshsurve/UNIVERSITY/INFO/7374/Final-Project-SmartCommerce/data_pipeline/data_files/"
df1 = pd.read_csv(data_path + "top_1000_product_review_summaries.csv")
df2 = pd.read_csv(data_path + "olist_order_items_dataset.csv")
df3 = pd.read_csv(data_path + "olist_products_dataset.csv")
df4 = pd.read_csv(data_path + "product_category_name_translation.csv")

# Merging DataFrames to get the desired df5
df5 = df1.merge(df2[["product_id", "price"]], on="product_id", how="left")\
         .merge(df3[["product_id", "product_weight_g", "product_length_cm", "product_height_cm", "product_width_cm", "product_category_name"]], on="product_id", how="left")\
         .merge(df4, on="product_category_name", how="left")

df5.drop_duplicates(inplace=True)

In [13]:
df5_avg = df5.groupby(
    ["summary", "product_id", "product_weight_g", "product_length_cm", "product_height_cm", "product_width_cm", "product_category_name", "product_category_name_english"]
)["price"].mean().reset_index()


In [14]:
df5_avg.to_csv(data_path + "product_avg_price.csv", index=False)

In [19]:
for i in range(10):
    prompt = f"""
        Given the following product details:
        - Price: {df5_avg.iloc[i]['price']}
        - Weight: {df5_avg.iloc[i]['product_weight_g']} grams
        - Dimensions: {df5_avg.iloc[i]['product_length_cm']} cm (L) x {df5_avg.iloc[i]['product_width_cm']} cm (W) x {df5_avg.iloc[i]['product_height_cm']} cm (H)
        - Category: {df5_avg.iloc[i]['product_category_name_english']}
        - Review Summary: {df5_avg.iloc[i]['summary']}

        Please generate:
        1. A product name.
        2. A description including:
        - **Description:** A brief overview of the product.
        - **Specifications:** Key details and material.
        - **Package Includes:** What is included in the package.

        Format the output clearly with headings for each section.
        """
    print(prompt)
    print("-----------------------------------")
    print("\n\n")



        Given the following product details:
        - Price: 173.10833333333335
        - Weight: 950.0 grams
        - Dimensions: 53.0 cm (L) x 13.0 cm (W) x 5.0 cm (H)
        - Category: housewares
        - Review Summary: **Customers say...** 

- The product met expectations and was received on time, with one customer expressing satisfaction with the purchase.
- Many customers appreciated the timely delivery and mentioned that they received the correct items.
- Overall, there is a strong sense of trust in the seller, with several stating they would buy again without hesitation.
- A few customers reported issues with not receiving certain items, specifically the rotating skewer.
- Good pricing was also highlighted, as customers noted that the prices were lower than those on the official site. 
- One customer expressed a desire for a customer card to obtain more benefits.

        Please generate:
        1. A product name.
        2. A description including:
        - **Descript

In [24]:
import pandas as pd
import os
from openai import OpenAI
from abc import ABC, abstractmethod
import json

class LLMCaller(ABC):
    def __init__(self, system_prompt):
        self.system_prompt = system_prompt

    @abstractmethod
    def call_llm(self, user_prompt):
        pass

class OpenAICaller(LLMCaller):
    def __init__(self, system_prompt):
        super().__init__(system_prompt)
        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

    def call_llm(self, user_prompt):
        messages = [
            {"role": "system", "content": self.system_prompt},
            {"role": "user", "content": user_prompt}
        ]
        response = self.client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            stream=True
        )
        # Join the response chunks and parse as JSON
        response_text = ''.join(chunk.choices[0].delta.content or "" for chunk in response)
        print(response_text)
        return json.loads(response_text)

# Initialize OpenAICaller with a system prompt
system_prompt = "Generate a JSON response with 'product_name':<str> and 'description':<str>. Ensure that you only include the json in response and nothing else like ```json"
llm_caller = OpenAICaller(system_prompt)

# Read your product data
df = df5_avg  # Adjust this to read your entire dataset

# Load existing results if available
results_file = 'product_idf_descriptions.csv'
if os.path.exists(results_file):
    results_df = pd.read_csv(results_file)
    processed_product_ids = results_df['product_id'].tolist()
else:
    results_df = pd.DataFrame(columns=['product_id', 'product_name', 'product_description'])
    processed_product_ids = []

# Prepare a list to store new results
results = []

# Generate product names and descriptions
for i, row in df.iterrows():
    if row['product_id'] in processed_product_ids:
        continue  # Skip if already processed

    user_prompt = f"""
    Given the following product details:
    - Price: {row['price']}
    - Weight: {row['product_weight_g']} grams
    - Dimensions: {row['product_length_cm']} cm (L) x {row['product_width_cm']} cm (W) x {row['product_height_cm']} cm (H)
    - Category: {row['product_category_name_english']}
    - Review Summary: {row['summary']}
    
    Please generate a JSON object with:
    - "product_name": The name of the product, add a fake company name too.
    - "description": A detailed description including Product Details what is it, Specifications:, and 'Package Includes: as a combined string. Do not include any other information like Customers faced issues etc.'
    """

    try:
        json_response = llm_caller.call_llm(user_prompt)
        product_name = json_response.get('product_name', 'Unknown')
        product_description = json_response.get('description', '')

        results.append({
            'product_id': row['product_id'],
            'product_name': product_name,
            'product_description': product_description
        })

        # Save results periodically
        if len(results) >= 5:  # Adjust the batch size as needed
            results_df = pd.concat([results_df, pd.DataFrame(results)], ignore_index=True)
            results_df.to_csv(results_file, index=False)
            processed_product_ids.extend(row['product_id'] for row in results)
            results = []

    except Exception as e:
        print(f"Error processing product ID {row['product_id']}: {e}")

# Save any remaining results
if results:
    results_df = pd.concat([results_df, pd.DataFrame(results)], ignore_index=True)
    results_df.to_csv(results_file, index=False)
    processed_product_ids.extend(row['product_id'] for row in results)


{
  "product_name": "ChefMaster Rotating Skewer Set by KitchenPro",
  "description": "The ChefMaster Rotating Skewer Set by KitchenPro is a must-have addition to your housewares collection. This versatile kitchen tool is perfect for grilling, barbecuing, and roasting your favorite meats and vegetables with ease. Specifications: Price: $173.11, Weight: 950.0 grams, Dimensions: 53.0 cm (L) x 13.0 cm (W) x 5.0 cm (H). Package Includes: 1 rotating skewer, 4 skewers, and a durable carrying case."
}
{
  "product_name": "Stark Home Essentials Storage Bin",
  "description": "The Stark Home Essentials Storage Bin is a stylish and functional storage solution designed for organizing your home. Specifications: Price: $94.68, Weight: 750.0 grams, Dimensions: 35.0 cm (L) x 35.0 cm (W) x 15.0 cm (H). Package Includes: 1 Storage Bin."
}
{
  "product_name": "Targaryen Arduino Starter Kit by Stark Tech",
  "description": "The Targaryen Arduino Starter Kit is designed for enthusiasts and professionals al

In [25]:
results_df

Unnamed: 0,product_id,product_name,product_description
0,d696750e550fd0f733979dd7e5dff921,ChefMaster Rotating Skewer Set by KitchenPro,The ChefMaster Rotating Skewer Set by KitchenP...
1,16ed6a6e3fce23b741650437fe58d65b,Stark Home Essentials Storage Bin,The Stark Home Essentials Storage Bin is a sty...
2,87cc2aef12eddf0b170234138b0946e0,Targaryen Arduino Starter Kit by Stark Tech,The Targaryen Arduino Starter Kit is designed ...
3,2b939dc9b176d7fa21594d588815d4a4,Elite Travel Companion - Luggage Organizer by ...,"Introducing the Elite Travel Companion, a prem..."
4,8b50a72d52d7a91fb19d19fbe069e2f2,Deluxe Kitchen Organizer by HomeEssentials,The Deluxe Kitchen Organizer is the perfect so...


In [None]:
results_df