In [1]:
import psycopg2
import csv
from faker import Faker
import random

In [2]:
fake = Faker()

In [3]:
def get_connection():
    return psycopg2.connect(
        host="localhost",
        database="5310 project",
        user="postgres",
        password="123"
    )


In [4]:
def generate_customers(n=2000):
    """Generate and insert customer data into the customers table."""
    try:
        conn = get_connection()
        cursor = conn.cursor()

        # Fetch all address_ids from the addresses table
        cursor.execute("SELECT address_id FROM addresses")
        all_addresses = [row[0] for row in cursor.fetchall()]

        # Fetch address_ids used by warehouses
        cursor.execute("SELECT address_id FROM warehouses")
        warehouse_addresses = {row[0] for row in cursor.fetchall()}

        # Determine available address_ids for customers (excluding warehouse addresses)
        available_customer_addresses = list(set(all_addresses) - warehouse_addresses)

        if len(available_customer_addresses) < 1:
            print(f"Error: Not enough unique addresses available for customers.")
            cursor.close()
            conn.close()
            return

        # Define the list of domains
        domains = ['gmail.com', 'qq.com', 'yahoo.com', 'hotmail.com']
        customers = []
        existing_emails = set()

        for _ in range(n):
            first_name = fake.first_name().title()  # Capitalize first letter
            last_name = fake.last_name().title()    # Capitalize first letter
            user_name = f"{first_name} {last_name}"
            domain = random.choice(domains)  # Randomly select a domain
            email = f"{first_name.lower()}.{last_name.lower()}@{domain}"  # Combine names and domain

            # Ensure email is unique
            while email in existing_emails:
                first_name = fake.first_name().title()
                last_name = fake.last_name().title()
                user_name = f"{first_name} {last_name}"
                email = f"{first_name.lower()}.{last_name.lower()}@{domain}"

            existing_emails.add(email)  # Add email to the set of existing emails

            # Allow multiple customers to have the same address_id
            address_id = random.choice(available_customer_addresses)
            customers.append((user_name, email, first_name, last_name, str(address_id)))  # Convert address_id to string

            cursor.execute(
                "INSERT INTO customers (user_name, email, first_name, last_name, address_id) VALUES (%s, %s, %s, %s, %s)",
                (user_name, email, first_name, last_name, address_id)
            )

        conn.commit()

        # Write the generated customer data to a CSV file
        with open('C:/Users/Yihua/Desktop/5310 project/customers.csv', 'w', newline='') as csvfile:
            csv_writer = csv.writer(csvfile)
            csv_writer.writerow(['user_name', 'email', 'first_name', 'last_name', 'address_id'])
            csv_writer.writerows(customers)

        cursor.close()
        conn.close()

        print(f"Generated {n} customers successfully.")
        
    except Exception as e:
        print(f"An error occurred: {e}")
        
if __name__ == "__main__":
    generate_customers()

Generated 2000 customers successfully.
