In [1]:
import psycopg2
from faker import Faker
import random
import csv


fake = Faker()

In [2]:
def get_connection():
    return psycopg2.connect(
        host="localhost",
        database="5310 project",
        user="postgres",
        password="123"
    )


In [3]:
# List of email domains to use
email_domains = ["gmail.com", "outlook.com", "yahoo.com", "hotmail.com", "mail.com"]

def generate_phone_number():
    # Generate a 10-digit local number
    area_code = f"{random.randint(100, 999):03d}"  # Area code should be 3 digits
    exchange_code = f"{random.randint(100, 999):03d}"  # Exchange code should be 3 digits
    subscriber_number = f"{random.randint(1000, 9999):04d}"  # Subscriber number should be 4 digits
    return f"({area_code}){exchange_code}-{subscriber_number}"

def generate_suppliers(n=50):
    conn = get_connection()
    cursor = conn.cursor()

    # Fetch address_ids used by customers and warehouses
    cursor.execute("SELECT address_id FROM customers")
    used_address_ids = {address[0] for address in cursor.fetchall()}
    
    cursor.execute("SELECT address_id FROM warehouses")
    warehouse_address_ids = {address[0] for address in cursor.fetchall()}
    
    # Combine the used address_ids from customers and warehouses
    combined_used_address_ids = used_address_ids.union(warehouse_address_ids)
    
    # Fetch all address_ids from addresses table
    cursor.execute("SELECT address_id FROM addresses")
    all_address_ids = {address[0] for address in cursor.fetchall()}
    
    # Get available address_ids for suppliers
    available_address_ids = list(all_address_ids - combined_used_address_ids)
    
    if len(available_address_ids) < n:
        print(f"Error: Not enough available addresses. Need at least {n} unique addresses.")
        cursor.close()
        conn.close()
        return

    suppliers_data = []

    for _ in range(n):
        address_id = random.choice(available_address_ids)
        company_name = fake.company()
        
        # Generate email using the company name in lowercase
        email_username = company_name.lower().replace(" ", ".").replace(",", "")
        domain = random.choice(email_domains)
        email = f"{email_username}@{domain}"
        
        phone_number = generate_phone_number()
        
        suppliers_data.append((company_name, email, phone_number, address_id))

        # Optionally, you can insert data into the database
        cursor.execute(
            "INSERT INTO suppliers (company_name, email, phone_number, address_id) VALUES (%s, %s, %s, %s)",
            (company_name, email, phone_number, address_id)
        )

    conn.commit()
    cursor.close()
    conn.close()
    
    # Write the generated suppliers data to a CSV file
    with open('C:/Users/Yihua/Desktop/5310 project/suppliers.csv', 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(['company_name', 'email', 'phone_number', 'address_id'])
        csv_writer.writerows(suppliers_data)

if __name__ == "__main__":
    generate_suppliers()