In [13]:
!pip install groq



env: GROQ_API_KEY=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


In [14]:
pip install nest_asyncio




In [None]:
import pandas as pd
from tqdm import tqdm  # Use tqdm instead of tqdm_asyncio for better control
import os
import asyncio
from groq import AsyncGroq
import nest_asyncio

# Enable nested asyncio loop handling
nest_asyncio.apply()

# Initialize the Groq client with API key
client = AsyncGroq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

# Load your data (replace these paths with the correct paths in your environment)
product_df = pd.read_csv('product_asin.csv')
reviews_sample_df = pd.read_csv('reviews_supplements.csv')

# Select sample reviews for the system message
sample_reviews = reviews_sample_df[['title', 'text']].dropna().head(3)

# Create the system content message
system_content = f"""
You are a helpful assistant capable of generating high-quality synthetic product reviews. Here are some examples of how to write product reviews, with a rating included at the end:

Product: {sample_reviews.iloc[0]['title']}
Review: {sample_reviews.iloc[0]['text']}
Rating: 5

Product: {sample_reviews.iloc[1]['title']}
Review: {sample_reviews.iloc[1]['text']}
Rating: 4

Product: {sample_reviews.iloc[2]['title']}
Review: {sample_reviews.iloc[2]['text']}
Rating: 3

Use these examples to create a concise review (1-2 sentences) and provide a rating between 1 to 5 that reflects the quality of the product.
"""

# Function to parse review and rating
def parse_review_and_rating(response_text):
    if "Rating:" in response_text:
        review, rating = response_text.split("Rating:", 1)
        review = review.strip()
        rating = int(rating.strip()) if rating.strip().isdigit() else 0
    else:
        review, rating = response_text, 0
    return review, rating

# Asynchronous function to generate a synthetic review and rating
async def generate_synthetic_review(product_name, system_content):
    user_message = f"Product: {product_name}\nReview and Rating:"
    try:
        chat_completion = await client.chat.completions.create(
            messages=[
                {"role": "system", "content": system_content},
                {"role": "user", "content": user_message}
            ],
            model="llama3-8b-8192",
            temperature=0.5,
            max_tokens=200,
            top_p=1,
            stream=False
        )
        generated_response = chat_completion.choices[0].message.content.strip()
        review_text, rating = parse_review_and_rating(generated_response)
        return review_text, rating
    except Exception as e:
        return str(e), 0

# Asynchronous function to generate synthetic reviews
async def generate_reviews_with_ratings(product_title, rating, system_message):
    review, generated_rating = await generate_synthetic_review(product_title, system_message)
    return {'title': product_title, 'synthetic_review': review, 'review_rating': rating}

# Main function to run the synthetic review generation
async def main():
    balanced_reviews_list = []

    # Create tasks for asynchronous review generation
    tasks = []
    for _, row in product_df.iterrows():
        product_title = row['title']
        original_rating = reviews_sample_df.sample(1)['rating'].values[0]
        tasks.append(generate_reviews_with_ratings(product_title, rating=original_rating, system_message=system_content))

    # Run tasks with tqdm for progress tracking
    for completed_task in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Generating Synthetic Reviews"):
        balanced_reviews_list.append(await completed_task)

    # Convert the list of synthetic reviews to a DataFrame and save it as CSV
    balanced_reviews_df = pd.DataFrame(balanced_reviews_list)
    output_file = "balanced_synthetic_reviews_with_llm_ratings.csv"
    balanced_reviews_df.to_csv(output_file, index=False)
    print(f"Synthetic reviews and ratings have been successfully generated and saved to {output_file}")

# Run the main function using asyncio.run()
asyncio.run(main())


Generating Synthetic Reviews:   0%|          | 0/483354 [00:00<?, ?it/s]