In [1]:
import psycopg2
from faker import Faker
import random
import csv


fake = Faker()

In [2]:
def get_connection():
    return psycopg2.connect(
        host="localhost",
        database="5310 project",
        user="postgres",
        password="123"
    )


In [3]:
positive_delivery_feedback = [
    "The delivery was quick and the packaging was secure.",
    "Excellent service! The package arrived on time and in perfect condition.",
    "The delivery was faster than expected, and the package was well protected.",
    "I was impressed with the speedy delivery and careful packaging.",
    "The courier was friendly, and the delivery arrived promptly."
]

negative_delivery_feedback = [
    "Delivery was delayed, but the product arrived in good condition.",
    "The courier left the package at the wrong address.",
    "The product arrived damaged due to poor packaging.",
    "The delivery was late, and the package was slightly damaged.",
    "There was a mix-up with the delivery, and it took longer than expected."
]

positive_product_feedback = [
    "The taste of the product exceeded my expectations; very fresh and delicious.",
    "Very satisfied with the purchase; the quality is outstanding and the product is very fresh.",
    "The product tastes great and the freshness is as described.",
    "Great value for the price; the quality and freshness are excellent.",
    "I am very pleased with the product's taste and freshness; it is of high quality.",
    "The expiration date was far out, and the product tasted incredibly fresh.",
    "The freshness and taste were top-notch, and the expiration date was well in the future.",
    "I loved the taste; it was fresh and had a long expiration date.",
    "The product was fresh and delicious, with a satisfactory expiration date.",
    "The quality, taste, and freshness of the product were impressive."
]

negative_product_feedback = [
    "The product is not as described; the taste was very disappointing and not fresh.",
    "Good value for the money, but the freshness could be better.",
    "The product broke after a few uses; not worth the price, and it wasn't fresh.",
    "Quality is below average; I'm not satisfied with the purchase, and it wasn't fresh.",
    "The product arrived with defects and did not work properly; the taste was bad and not fresh.",
    "The taste was awful, and the product was not fresh at all.",
    "The expiration date was too close, and the product tasted stale.",
    "Very disappointed with the taste and freshness of the product; the expiration date was near.",
    "The product tasted bad and wasn't fresh, with an expiration date too soon.",
    "Poor taste and quality; the product wasn't fresh and had a short expiration date."
]

def generate_feedback(rating):
    if rating >= 4:
        feedback = random.choice(positive_delivery_feedback + positive_product_feedback)
    elif rating == 3:
        feedback = random.choice(positive_product_feedback + negative_delivery_feedback)
    else:
        feedback = random.choice(negative_delivery_feedback + negative_product_feedback)
    return feedback

def generate_ratings(n=5000):
    conn = get_connection()
    cursor = conn.cursor()

    # Fetch all unique order_ids from the customer_order table
    cursor.execute("SELECT DISTINCT order_id FROM customer_order")
    order_ids = [row[0] for row in cursor.fetchall()]

    if len(order_ids) < n:
        print(f"Error: Not enough unique order_ids. Need at least {n} unique order_ids.")
        cursor.close()
        conn.close()
        return

    # Shuffle the list of order_ids to ensure randomness
    random.shuffle(order_ids)

    ratings = []

    for i in range(n):
        order_id = order_ids[i]
        customer_id_query = """
            SELECT customer_id 
            FROM customer_order 
            WHERE order_id = %s
        """
        cursor.execute(customer_id_query, (order_id,))
        customer_id = cursor.fetchone()[0]

        rating = random.randint(1, 5)  # Rating between 1 and 5
        feedback = generate_feedback(rating)
        
        # Append the data to ratings list
        ratings.append((customer_id, order_id, rating, feedback))
        
        # Insert the rating into the database
        cursor.execute(
            "INSERT INTO ratings (customer_id, order_id, rating, feedback) VALUES (%s, %s, %s, %s)",
            (customer_id, order_id, rating, feedback)
        )

    conn.commit()
    cursor.close()
    conn.close()

    
    # Write the generated ratings data to a CSV file
    with open('C:/Users/Yihua/Desktop/5310 project/ratings.csv', 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(['customer_id', 'order_id', 'rating', 'feedback'])
        csv_writer.writerows(ratings)

if __name__ == "__main__":
    generate_ratings()