## Data Warehousing HW 2 - Grocery Store Simulation Part 1
## Anthony Ung, Sean Jerzewski, Gideon Kipkorir

## Populate Table

We are using Chelsea Cantone's code that was provided for us in the Week 2 ZIP  

Chelsea's code connected to a Postgres server and we refactored to connect to a local db.sqlite3  
We also created a dimension table to support lookups of product names and SKUs.

In [2]:
import sqlite3 as lite
import csv
from datetime import datetime


def GroceryDatabaseAccess():
    GroceryDatabaseAccess.grocerDBConnection
    GroceryDatabaseAccess.groceryDBCursor
    GroceryDatabaseAccess.salesTransactionsToCommitCount
    GroceryDatabaseAccess.maxTransactionsBeforeCommit
    
GroceryDatabaseAccess.groceryDBConnection = None
GroceryDatabaseAccess.groceryDBCursor = None
GroceryDatabaseAccess.salesTransactionsToCommitCount = 0   
GroceryDatabaseAccess.maxTransactionsBeforeCommit = 10000 

def build_table():
    sql = 'DROP TABLE IF EXISTS sales_transactions'
    GroceryDatabaseAccess.groceryDBCursor.execute(sql)

    sql = 'CREATE TABLE sales_transactions(date TEXT, customerNumber INT, sku INT, salesPrice REAL)'
    GroceryDatabaseAccess.groceryDBCursor.execute(sql)
    print('Sales Transactions table successfully created')

    sql = 'DROP TABLE IF EXISTS products'
    GroceryDatabaseAccess.groceryDBCursor.execute(sql)
    
    sql = 'CREATE TABLE products(sku INT, product_name TEXT)'
    GroceryDatabaseAccess.groceryDBCursor.execute(sql)
    print('Products table successfully created')

    csv.register_dialect('piper', delimiter='|', quoting=csv.QUOTE_NONE)
    
    with open('Products1.txt', 'r') as csvfile:
        i = 0
        
        for row in csv.DictReader(csvfile, dialect='piper'):
            sku = row.get('SKU')
            product_name = row.get('Product Name')
            GroceryDatabaseAccess.groceryDBCursor.execute('insert into products values (?, ?)',
                                                      (sku,product_name))
            i += 1
            if i % 10000 == 0:
                GroceryDatabaseAccess.groceryDBConnection.commit()
                print(f"Committed row {i}")
            
        GroceryDatabaseAccess.groceryDBConnection.commit()
        print(f"Committed row {i}")


def connectToDatabase():
    print("Connecting to the grocerydb database")
    #try:
    con = lite.connect(r'store.db')
    print('Database successfully created')

    GroceryDatabaseAccess.groceryDBConnection = con
    GroceryDatabaseAccess.groceryDBCursor = con.cursor()

    build_table()
    return True
    #except:
       #print("Unable to make the grocery database")
       #return False
       
def closeDatabaseConnection():
    print("Committing any uncommited transactions.")   
    GroceryDatabaseAccess.groceryDBConnection.commit()
    
    print("Closing the grocerydb database connection")   
    GroceryDatabaseAccess.groceryDBConnection.close()
   
def writeSalesTransaction(date: datetime, customerNumber: int, sku: int, salesPrice: float):
    GroceryDatabaseAccess.salesTransactionsToCommitCount += 1
    #print("Writing sales transaction to database: date="+str(date)+" customerNumber="+
    #      str(customerNumber)+" sku="+str(sku)+" salesPrice="+str(salesPrice))
    try:
        GroceryDatabaseAccess.groceryDBCursor.execute('insert into sales_transactions values (?, ?, ?, ?)',
                                                      (date,customerNumber,sku,salesPrice))
        #print("Total sales transactions to commit: "+str(GroceryDatabaseAccess.salesTransactionsToCommitCount))
        
        if GroceryDatabaseAccess.salesTransactionsToCommitCount == GroceryDatabaseAccess.maxTransactionsBeforeCommit:
        #    print("Committing "+str(GroceryDatabaseAccess.salesTransactionsToCommitCount)+" records.")
            GroceryDatabaseAccess.groceryDBConnection.commit()
            GroceryDatabaseAccess.salesTransactionsToCommitCount = 0   
            
    except Exception as err:
        print("Error writing sales_transactions database table", err)

RUN = False
def run():
    if RUN:
        connectToDatabase()

run()


In [3]:
"""
Note: These were Chelsea's original comments.

Code from Leonchuck.py code

- Modified to not write to CSV
- Fixed bug with writing price and sku
- Updated Grocery Store parameters to match Team 3 parameters
- Modified to fix fenceposts regarding the simulation start and end date

"""

import csv
import sys
from decimal import Decimal
import random
from datetime import date, datetime, timedelta
from collections import Counter

csv.register_dialect('piper', delimiter='|', quoting=csv.QUOTE_NONE)

# Group 8 parameters
price_multiplier = 1.2
customers_low = 1020
customers_high = 1060
weekend_increase = 75
maximum_items = 90

simulation_start_date = date(2023, 12, 31)
simulation_end_date = date(2024, 12, 31)

total_items_bought = 0
customer_count = 0

manufacturer_list = []
product_name_list = []
size_list = []
item_type_list = []
sku_list = []
base_price_list = []

milk_price = []
milk_sku = []

cereal_price = []
cereal_sku = []

baby_food_price = []
baby_food_sku = []

diapers_price = []
diapers_sku = []

bread_price = []
bread_sku = []

peanut_butter_price = []
peanut_butter_sku = []

jelly_jam_price = []
jelly_jam_sku = []

all_items = []

# Collect all price and sku for each primary product
with open('Products1.txt', 'r') as csvfile:
    for row in csv.DictReader(csvfile, dialect='piper'):
        nosign = row['BasePrice']
        nosign = float(Decimal(nosign.strip('$')))
        nosign = nosign * price_multiplier
        manufacturer_list.append(row.get('Manufacturer'))
        product_name_list.append(row.get('ProductName'))
        size_list.append(row.get('Size'))
        item_type_list.append(row.get('itemType'))
        sku_list.append(row.get('SKU'))
        base_price_list.append(nosign)
        if (row['itemType'] == 'Milk'):
            milk_price.append(nosign)
            milk_sku.append(row['SKU'])
        elif (row['itemType'] == 'Cereal'):
            cereal_price.append(nosign)
            cereal_sku.append(row['SKU'])
        elif (row['itemType'] == 'Baby Food'):
            baby_food_price.append(nosign)
            baby_food_sku.append(row['SKU'])
        elif (row['itemType'] == 'Diapers'):
            diapers_price.append(nosign)
            diapers_sku.append(row['SKU'])
        elif (row['itemType'] == 'Bread'):
            bread_price.append(nosign)
            bread_sku.append(row['SKU'])
        elif (row['itemType'] == 'Peanut Butter'):
            peanut_butter_price.append(nosign)
            peanut_butter_sku.append(row['SKU'])
        else:
            if (row['itemType'] == 'Jelly/Jam'):
                jelly_jam_price.append(nosign)
                jelly_jam_sku.append(row['SKU'])


def get_milk_sku_and_price():
    random_index = random.randrange(len(milk_sku))
    return milk_sku[random_index], milk_price[random_index]


def cereal_sku_and_price():
    random_index = random.randrange(len(cereal_sku))
    return cereal_sku[random_index], cereal_price[random_index]


def baby_food_sku_and_price():
    random_index = random.randrange(len(baby_food_sku))
    return baby_food_sku[random_index], baby_food_price[random_index]


def diapers_sku_and_price():
    random_index = random.randrange(len(diapers_sku))
    return diapers_sku[random_index], diapers_price[random_index]


def bread_sku_and_price():
    random_index = random.randrange(len(bread_sku))
    return bread_sku[random_index], bread_price[random_index]


def peanut_butter_sku_and_price():
    random_index = random.randrange(len(peanut_butter_sku))
    return peanut_butter_sku[random_index], peanut_butter_price[random_index]


def jelly_jam_sku_and_price():
    random_index = random.randrange(len(jelly_jam_sku))
    return jelly_jam_sku[random_index], jelly_jam_price[random_index]


def get_random_item_sku_and_price():
    random_index = random.randrange(len(sku_list))
    return sku_list[random_index], base_price_list[random_index]


current_date = simulation_start_date
daily_customers = 0
my_items = 0
customer_number = 1

hasConnected = connectToDatabase()
if not hasConnected:
    print("Exiting Code ...")
    sys.exit(1)

for iday in range(0, 365):
    
    increase = 0
    current_date += timedelta(1)
    if current_date.weekday() >= 5:
        increase = weekend_increase

    date_str = current_date.strftime('%Y-%m-%d')

    print(f"Day {date_str}")
    
    daily_customers = random.randint(customers_low + increase, customers_high + increase)
    customer_number = 1

    while customer_number <= daily_customers:
        customer_count = customer_count + 1
        my_items = random.randint(1, maximum_items)
        k = 0
        if random.randint(1, 100) <= 70:
            sku_and_price = get_milk_sku_and_price()
            SKU = sku_and_price[0]
            price = sku_and_price[1]
            writeSalesTransaction(date_str, customer_number, SKU, price)
            k += 1

            if random.randint(1, 100) <= 50:
                sku_and_price = cereal_sku_and_price()
                SKU = sku_and_price[0]
                price = sku_and_price[1]
                writeSalesTransaction(date_str, customer_number, SKU, price)
                k += 1

        else:
            if random.randint(1, 100) <= 5:
                sku_and_price = cereal_sku_and_price()
                SKU = sku_and_price[0]
                price = sku_and_price[1]
                writeSalesTransaction(date_str, customer_number, SKU, price)
                k += 1

        if random.randint(1, 100) <= 20:
            sku_and_price = baby_food_sku_and_price()
            SKU = sku_and_price[0]
            price = sku_and_price[1]
            writeSalesTransaction(date_str, customer_number, SKU, price)
            k += 1

            if random.randint(1, 100) <= 80:
                sku_and_price = diapers_sku_and_price()
                SKU = sku_and_price[0]
                price = sku_and_price[1]
                writeSalesTransaction(date_str, customer_number, SKU, price)
                k += 1

        else:
            if random.randint(1, 100) <= 1:
                sku_and_price = diapers_sku_and_price()
                SKU = sku_and_price[0]
                price = sku_and_price[1]
                writeSalesTransaction(date_str, customer_number, SKU, price)
                k += 1

        if random.randint(1, 100) <= 50:
            sku_and_price = bread_sku_and_price()
            SKU = sku_and_price[0]
            price = sku_and_price[1]
            writeSalesTransaction(date_str, customer_number, SKU, price)
            k += 1

        if random.randint(1, 100) <= 10:
            sku_and_price = peanut_butter_sku_and_price()
            SKU = sku_and_price[0]
            price = sku_and_price[1]
            writeSalesTransaction(date_str, customer_number, SKU, price)
            k += 1

            if random.randint(1, 100) <= 90:
                sku_and_price = jelly_jam_sku_and_price()
                SKU = sku_and_price[0]
                price = sku_and_price[1]
                writeSalesTransaction(date_str, customer_number, SKU, price)
                k += 1

        else:
            if random.randint(1, 100) <= 5:
                sku_and_price = jelly_jam_sku_and_price()
                SKU = sku_and_price[0]
                price = sku_and_price[1]
                writeSalesTransaction(date_str, customer_number, SKU, price)
                k += 1

        while k < my_items:
            sku_and_price = get_random_item_sku_and_price()
            SKU = sku_and_price[0]
            price = sku_and_price[1]
            writeSalesTransaction(date_str, customer_number, SKU, price)
            k += 1

        customer_number = customer_number + 1

closeDatabaseConnection()


Connecting to the grocerydb database
Database successfully created
Sales Transactions table successfully created
Products table successfully created
Committed row 2075
Day 2024-01-01
Day 2024-01-02
Day 2024-01-03
Day 2024-01-04
Day 2024-01-05
Day 2024-01-06
Day 2024-01-07
Day 2024-01-08
Day 2024-01-09
Day 2024-01-10
Day 2024-01-11
Day 2024-01-12
Day 2024-01-13
Day 2024-01-14
Day 2024-01-15
Day 2024-01-16
Day 2024-01-17
Day 2024-01-18
Day 2024-01-19
Day 2024-01-20
Day 2024-01-21
Day 2024-01-22
Day 2024-01-23
Day 2024-01-24
Day 2024-01-25
Day 2024-01-26
Day 2024-01-27
Day 2024-01-28
Day 2024-01-29
Day 2024-01-30
Day 2024-01-31
Day 2024-02-01
Day 2024-02-02
Day 2024-02-03
Day 2024-02-04
Day 2024-02-05
Day 2024-02-06
Day 2024-02-07
Day 2024-02-08
Day 2024-02-09
Day 2024-02-10
Day 2024-02-11
Day 2024-02-12
Day 2024-02-13
Day 2024-02-14
Day 2024-02-15
Day 2024-02-16
Day 2024-02-17
Day 2024-02-18
Day 2024-02-19
Day 2024-02-20
Day 2024-02-21
Day 2024-02-22
Day 2024-02-23
Day 2024-02-24
Day 202