## Database Build Script v3
## Anthony Ung, Sean Jerzewski, Gideon Kipkorir

Any new modifications to the database will happen in this Notebook.

Modifications to Chelsea Cantone's Code from Week 2
1. Changed from using Postgres to SQLite3.
2. Created classes of functions to deliniate separate modules.
3. Created classes of products

Inspiration from Harini's Code from Week 4
1. Table Schemas

In [1]:
import sqlite3 as lite
import csv
from datetime import datetime, date, timedelta
from decimal import Decimal
import random

'''
    If you are building the grocery database, 
        you should only touch
        ARGS, TABLE_DEFINITIONS, and params.

    If you get an error message, 
        set all three values in ARGS to True
        and then re-run the script.

    It took about 10 minutes
        on Anthony Ung's Thinkpad P14s
        with Ryzen 7 8840HS, 32GB RAM, and 4TB 990 Pro

    The third arg was usful in a later HW when we did some profiling
'''

ARGS = {
    'Create Products Table': True,
    'Populate Sales Transactions Data': True
}

TABLE_DEFINITIONS = {
    'products': \
            'CREATE TABLE products(' \
                    'sku INT,' \
                    'product_name TEXT, ' \
                    'product_type TEXT, ' \
                    'manufacturer TEXT, ' \
                    'base_price REAL)',
    'transactions_sales': \
            'CREATE TABLE transactions_sales(' \
                    'date TEXT, ' \
                    'customer_number INT, ' \
                    'sku INT, ' \
                    'salesPrice REAL, ' \
                    'items_left INT, ' \
                    'cases_ordered INT)',
    'transactions_customers': \
            'CREATE TABLE transactions_customers(' \
                    'date VARCHAR(8), ' \
                    'customer_number INT, ' \
                    'num_items INT' \
                    'total FLOAT)'
}


class params:
    
    class group:
        price_multiplier = 1.2
        customers_low = 1020
        customers_high = 1060
        weekend_increase = 75
        maximum_items = 90
        
    class simulation:
        start_date = date(2024, 1, 1)
        end_date = date(2024, 12, 31)
        
    class debug:
        display_daily_commits = 14

    '''
        Initial Stock in Cases
        Numbers are based on profiling in HW 3
        If less than 2 cases are computed, 
            I instead set the max stock level to 2.
    '''
    class max_stock_levels:
        milk = 16
        cereal = 2
        baby_food = 2
        diapers = 2
        bread = 3
        peanut_butter = 5
        jelly_jam = 10
        other = 7
        
        
class db:
    con = None
    cur = None
    commit_pending = 0

    def connect():
        db.con = lite.connect(r'store.db')
        db.cur = db.con.cursor()
        print('Database Successfully Connected To')

    def execute_sql(sql):
        assert type(sql) == str, \
            f"""Error! This function expected a string. 
                Got {print(type(sql))} instead"""
        db.cur.execute(sql)

    def execute_sql_values(sql, values):
        assert type(sql) == str, \
            f"""Error! This function expected a string. 
                Got {print(type(sql))} instead"""
        assert isinstance(values, tuple), \
            f"""Error! This function expected a string. 
                Got {print(type(values))} instead"""
        db.cur.execute(sql, values)

    def commit():
        db.con.commit()
        db.commit_pending = 0

    def close():
        db.con.commit()
        db.con.close()
        print('Database Connection Closed')


class db_debug():
    
    def execute_sql(sql):
        assert isinstance(sql, str), \
            f"""Error! This function expected a string. 
                Got {type(sql)} instead"""

        '''
        In my testing, the db_debug class does not play nicely with
            the db class because even though I invoke db.connect(),
            I still get error messages saying that the database is closed.
        Each invocation creates its own database connection since
            these methods are meant to be used very rarely.
        '''
        con = lite.connect(r'store.db')
        cur = con.cursor()
        
        results = cur.execute(sql).fetchall()
        for row in results:
            print(row)

        con.close()

    def execute_sql_values(sql, values):
        assert type(sql) == str, \
            f"""Error! This function expected a string. 
                Got {print(type(sql))} instead"""
        assert isinstance(values, tuple), \
            f"""Error! This function expected a tuple. 
                Got {print(type(values))} instead"""

        con = lite.connect(r'store.db')
        cur = con.cursor()
        
        results = cur.execute(sql, values)
        for row in results:
            print(row)
        
        con.close()
        

def create_products_table():
    if not ARGS['Create Products Table']:
        print("You don't want to create the Products table")
        return
    db.execute_sql('DROP TABLE IF EXISTS products')
    db.execute_sql(TABLE_DEFINITIONS['products'])
    db.commit()

    csv.register_dialect('piper', delimiter='|', quoting=csv.QUOTE_NONE)
    
    with open('Products1.txt', 'r') as csvfile:
        i = 0
        
        for row in csv.DictReader(csvfile, dialect='piper'):
            sku = row.get('SKU')
            product_name = row.get('Product Name')
            product_type = row.get('itemType')
            manufacturer = row.get('Manufacturer')
            base_price = row.get('BasePrice')
            db.execute_sql_values(sql='insert into products values (?, ?, ?, ?, ?)',\
                                    values=(sku, product_name, product_type, manufacturer, base_price))
            i += 1
            if i % 10000 == 0:
                db.commit()
                print(f"Committed {i} products")
            
        db.commit()
        print(f"Committed {i} products")

    Inventory.populate_lists()

class Product:

    def __init__(self, p_name, p_type, sku, price):
        self.p_name = p_name
        self.p_type = p_type
        self.sku = sku
        self.price = price
        self.stock = 0
        self.total_cases_ordered = 0

    def __str__(self):
        return f'{self.p_name} - {self.p_type} - {self.sku} - {self.price}'

    def restock(self):
        match self.p_type:
            case 'Milk':
                max_limit = 12 * params.max_stock_levels.milk
            case 'Cereal':
                max_limit = 12 * params.max_stock_levels.cereal
            case 'Baby Food':
                max_limit = 12 * params.max_stock_levels.baby_food
            case 'Diapers':
                max_limit = 12 * params.max_stock_levels.diapers
            case 'Bread':
                max_limit = 12 * params.max_stock_levels.bread
            case 'Peanut Butter':
                max_limit = 12 * params.max_stock_levels.peanut_butter
            case 'Jelly/Jam':
                max_limit = 12 * params.max_stock_levels.jelly_jam
            case _:
                max_limit = 12 * params.max_stock_levels.other
        num_items_needed = max_limit - self.stock
        num_cases_needed = (num_items_needed + 11) // 12

        self.total_cases_ordered += num_cases_needed
        self.stock += 12*(num_cases_needed)


class Inventory:
    from enum import Enum
    import random
    
    products = {}
    products['milk'] = []
    products['cereal'] = []
    products['baby food'] = []
    products['diapers'] = []
    products['bread'] = []
    products['peanut butter'] = []
    products['jelly jam'] = []
    products['other'] = []

    class TYPE(Enum):
        OTHER = 'other'
        MILK = 'milk'
        CEREAL = 'cereal'
        BABY_FOOD = 'baby food'
        DIAPERS = 'diapers'
        BREAD = 'bread'
        PEANUT_BUTTER = 'peanut butter'
        JELLY_JAM = 'jelly jam'

    def select(p_type):
        assert isinstance(p_type, Inventory.TYPE), f'Incorrect type for Inventory.select(). {type(p_type)} received.'
        num_products_in_type = len(Inventory.products[p_type.value])
        product_index = random.randint(0, (num_products_in_type-1))
        last_index = product_index - 1 if product_index != 0 else (num_products_in_type-1)

        product = Inventory.products[p_type.value][product_index]

        while(product.stock <= 0 and product_index != last_index):
            
            product_index += 1
            product_index %= num_products_in_type
            product = Inventory.products[p_type.value][product_index]

        '''
            If everything is out of stock, then return None and let the caller deal with it.
        '''
        if(product_index == last_index):
            return None
        
        product.stock -= 1
        return product
    
    def populate_lists():
        if not ARGS['Create Products Table']:
            print("You don't want to create the Products table")
            return
        
        # Jupyter makes lists persist in memory after I run each cell.
        # I delete the existing lists in order to not have the same product appear multiple times.
        Inventory.products = {}
        Inventory.products['milk'] = []
        Inventory.products['cereal'] = []
        Inventory.products['baby food'] = []
        Inventory.products['diapers'] = []
        Inventory.products['bread'] = []
        Inventory.products['peanut butter'] = []
        Inventory.products['jelly jam'] = []
        Inventory.products['other'] = []

        with open('Products1.txt', 'r') as csvfile:
        
            for row in csv.DictReader(csvfile, dialect='piper'):
                sku = row.get('SKU')
                product_name = row.get('Product Name')
                product_type = row.get('itemType')

                price = row.get('BasePrice')
                price = float(Decimal(price.strip('$')))
                price = round(price * params.group.price_multiplier, 2)
                
                current_product = Product(\
                    p_name = product_name, \
                    p_type = product_type, \
                    sku = sku, 
                    price = price
                )

                
                match product_type:
                    case 'Milk':
                        Inventory.products['milk'].append(current_product)
                    case 'Cereal':
                        Inventory.products['cereal'].append(current_product)
                    case 'Baby Food':
                        Inventory.products['baby food'].append(current_product)
                    case 'Diapers':
                        Inventory.products['diapers'].append(current_product)
                    case 'Bread':
                        Inventory.products['bread'].append(current_product)
                    case 'Peanut Butter':
                        Inventory.products['peanut butter'].append(current_product)
                    case 'Jelly/Jam':
                        Inventory.products['jelly jam'].append(current_product)
                    case _:
                        Inventory.products['other'].append(current_product)

            '''
                We want the customer to randomly select another item of the same type if the item
                    is out of stock.
                The select() method chooses the next index and this relies on having random products.
            '''
        for product_list in Inventory.products.values():
            random.shuffle(product_list)
        print('Products in memory successfully populated.')

    def restock_milk():
        for milk_product in Inventory.products['milk']:
            milk_product.restock()

    def restock_all():
        for product_list in Inventory.products.values():
            for product in product_list:
                product.restock()
        

class simulate:
    num_days = 0
    start_date = params.simulation.start_date
    end_date = params.simulation.end_date

    
    def run():
        if not ARGS['Populate Sales Transactions Data']:
            print("You don't want to populate the Sales Transactions table")
            return

        db.execute_sql('DROP TABLE IF EXISTS transactions_sales')
        db.execute_sql(TABLE_DEFINITIONS['transactions_sales'])
        db.commit()

        current_date = simulate.start_date

        while(current_date <= simulate.end_date):
            date_str = current_date.strftime('%Y-%m-%d')
            simulate.simulate_one_day(current_date)
            current_date += timedelta(1)

    
    def simulate_one_day(current_date):
        assert isinstance(current_date, date), \
            f"""Error! This function expected a date. 
                Got {type(current_date)} instead"""

        if(current_date == simulate.start_date):
            Inventory.restock_all()

        '''
            Milk is restocked all 7 days of the week.
            Everything else is restocked on Tuesday, Thursday, and Saturday.
        '''
        if(current_date.weekday() % 2 == 0):
            Inventory.restock_milk()
        else:
            Inventory.restock_all()
            

        simulate.num_days += 1        
        increase = 0
        if current_date.weekday() >= 5:
            increase = params.group.weekend_increase
    
        date_str = current_date.strftime('%Y-%m-%d')

        daily_customers = random.randint(params.group.customers_low + increase, params.group.customers_high + increase)

        for customer_number in range(daily_customers):
            simulate.simulate_one_customer(date_str, customer_number + 1)

        if (simulate.num_days % params.debug.display_daily_commits == 0) \
            or (current_date == simulate.start_date) \
            or (current_date == simulate.end_date):
            
            print(f'{datetime.now()} - {date_str} - {db.commit_pending} records created and committing')
        db.commit()
        

    
    def simulate_one_customer(date_str, customer_number):
        assert isinstance(date_str, str), \
            f"""Error! This function expected a date. 
                Got {type(current_date)} instead"""
        assert isinstance(customer_number, int), \
            f"""Error! This function expected a date. 
                Got {type(current_date)} instead"""

        class Customer:
            def __init__(self):
                self.date = date_str
                self.customer_number = customer_number
                self.num_items = 0
                self.max_items = random.randint(1, params.group.maximum_items)
                self.running_total = 0

        customer_data = Customer()
        
        if random.randint(1, 100) <= 70:
            product = Inventory.select(Inventory.TYPE.MILK)
            simulate.buy(customer_data, product)

            if random.randint(1, 100) <= 50:
                product = Inventory.select(Inventory.TYPE.CEREAL)
                simulate.buy(customer_data, product)

        else:
            if random.randint(1, 100) <= 5:
                product = Inventory.select(Inventory.TYPE.CEREAL)
                simulate.buy(customer_data, product)

        if random.randint(1, 100) <= 20:
            product = Inventory.select(Inventory.TYPE.BABY_FOOD)
            simulate.buy(customer_data, product)

            if random.randint(1, 100) <= 80:
                product = Inventory.select(Inventory.TYPE.DIAPERS)
                simulate.buy(customer_data, product)
                
        else:
            if random.randint(1, 100) <= 1:
                product = Inventory.select(Inventory.TYPE.DIAPERS)
                simulate.buy(customer_data, product)

        if random.randint(1, 100) <= 50:
            product = Inventory.select(Inventory.TYPE.BREAD)
            simulate.buy(customer_data, product)

        if random.randint(1, 100) <= 10:
            product = Inventory.select(Inventory.TYPE.PEANUT_BUTTER)
            simulate.buy(customer_data, product)

            if random.randint(1, 100) <= 90:
                product = Inventory.select(Inventory.TYPE.JELLY_JAM)
                simulate.buy(customer_data, product)

        else:
            if random.randint(1, 100) <= 5:
                product = Inventory.select(Inventory.TYPE.JELLY_JAM)
                simulate.buy(customer_data, product)

        while customer_data.num_items < customer_data.max_items:
            product = Inventory.select(Inventory.TYPE.OTHER)
            simulate.buy(customer_data, product)

        simulate.write_customer_total(customer_data)

    def buy(customer, product):
        if product is None:
            '''
                In the rare case that there is no inventory left for a particular product,
                    the current customer stops buying.
            '''
            customer.max_items = customer.num_items
            return
        
        customer.num_items += 1
        customer.running_total != product.price
        db.commit_pending += 1
        try:
            db.execute_sql_values('insert into transactions_sales values (?, ?, ?, ?, ?, ?)',
                                        (customer.date,customer.customer_number,product.sku,product.price, product.stock, product.total_cases_ordered))

        except Exception as err:
            print("Error writing transactions_sales database table", err)

    def write_customer_total(customer):
        db.commit_pending += 1
        try:
            db.execute_sql_values('INSERT INTO transactions_customers VALUES (?, ?, ?)', 
                                     (customer.date, customer.customer_number, customer.running_total))

        except Exception as err:
            print("Error writing transactions_sales database table", err)

        
def run():
    db.connect()
    create_products_table()
    simulate.run()
    db.close()

run()

Database Successfully Connected To
Committed 2075 products
Products in memory successfully populated.
2025-02-24 15:42:29.609031 - 2024-01-01 - 50828 records created and committing
2025-02-24 15:42:32.047980 - 2024-01-14 - 50889 records created and committing
2025-02-24 15:42:34.983975 - 2024-01-28 - 51821 records created and committing
2025-02-24 15:42:38.453155 - 2024-02-11 - 51274 records created and committing
2025-02-24 15:42:42.067997 - 2024-02-25 - 51509 records created and committing
2025-02-24 15:42:45.711015 - 2024-03-10 - 50583 records created and committing
2025-02-24 15:42:49.117948 - 2024-03-24 - 51407 records created and committing
2025-02-24 15:42:52.587757 - 2024-04-07 - 51315 records created and committing
2025-02-24 15:42:56.087862 - 2024-04-21 - 53151 records created and committing
2025-02-24 15:42:59.567901 - 2024-05-05 - 52052 records created and committing
2025-02-24 15:43:03.085255 - 2024-05-19 - 50392 records created and committing
2025-02-24 15:43:06.564709 - 

NameError: name 'customers_transactions_table' is not defined

## Testbed
Inspect anything you want.

In [2]:
# db_debug.execute_sql('SELECT * from transactions_sales LIMIT 100')
db_debug.execute_sql('SELECT * from transactions_sales WHERE items_left < 0 LIMIT 100')

In [3]:
db_debug.execute_sql("SELECT * from transactions_customers WHERE date = '2024-12-31'")

('2024-12-31', 1, 279.22)
('2024-12-31', 2, 297.82)
('2024-12-31', 3, 330.8)
('2024-12-31', 4, 69.91)
('2024-12-31', 5, 267.35)
('2024-12-31', 6, 231.97)
('2024-12-31', 7, 273.92)
('2024-12-31', 8, 185.5)
('2024-12-31', 9, 151.51)
('2024-12-31', 10, 246.12)
('2024-12-31', 11, 303.76)
('2024-12-31', 12, 51.76)
('2024-12-31', 13, 103.6)
('2024-12-31', 14, 226.27)
('2024-12-31', 15, 31.96)
('2024-12-31', 16, 285.91)
('2024-12-31', 17, 249.26)
('2024-12-31', 18, 138.95)
('2024-12-31', 19, 211.96)
('2024-12-31', 20, 244.48)
('2024-12-31', 21, 230.76)
('2024-12-31', 22, 264.31)
('2024-12-31', 23, 102.54)
('2024-12-31', 24, 99.66)
('2024-12-31', 25, 130.68)
('2024-12-31', 26, 155.51)
('2024-12-31', 27, 212.17)
('2024-12-31', 28, 120.6)
('2024-12-31', 29, 270.49)
('2024-12-31', 30, 124.72)
('2024-12-31', 31, 222.22)
('2024-12-31', 32, 175.74)
('2024-12-31', 33, 147.26)
('2024-12-31', 34, 89.99)
('2024-12-31', 35, 7.06)
('2024-12-31', 36, 98.8)
('2024-12-31', 37, 27.52)
('2024-12-31', 38, 237.4