## Build Data Mart
#### Contributors:
##### Tean 8: Anthony Ung, Sean Jerzewski, Gideon Kipkorir
##### Team 9: Rohith, Sneha Dasarla
##### Team 10: Anmol Brahmbhatt, Nikita Brahmbhatt, Satya

## 0. Dependencies and Global Variables

In [1]:
import os
from enum import Enum
import csv
import sqlite3 as lite
from decimal import Decimal
from datetime import date, timedelta

In [2]:
DB_HANDLES = {}

#

## 1. Gather the file paths
  
  
## IMPORTANT: 
#### Most of these files are untracked on GitHub. it is each team members'   
####   &emsp; &emsp; It is each team members' individual responsibilities  
####   &emsp; &emsp; to build the Database and CSV files for themselves using the other Jupyter notebooks. 

In [3]:
FILE_PATHS = {
    'DB_TEAM_8' : './../0_SD_Team_8/store_team_8.db',
    'DB_TEAM_9' : './../0_SD_Team_9/grocery_store.db',
    'DB_TEAM_10' : './../0_SD_Team_10/grocery_team_10_v2.db',
    'PRODUCTS_CSV' : './../2_Product_Mapping/PRODUCTS_MAPPED.csv'
}

DATA_MART_PATH = './Region_C_Data_Mart.db'

In [4]:
ALL_FILES_OK = True

for file_key in FILE_PATHS:
    file_name = FILE_PATHS[file_key]
    file_exists = os.path.isfile(file_name)
    
    if(file_exists):
        print(f'OK - {file_key} - \'{file_name}\'')
    else:
        ALL_FILES_OK = False
        print(f'MISSING - {file_key} - \'{file_name}\'')

if not ALL_FILES_OK:
    raise SystemExit('\n' "ERROR!" '\n' "You are missing files!" '\n' "Read and Follow the Cell instructions provided.")

OK - DB_TEAM_8 - './../0_SD_Team_8/store_team_8.db'
OK - DB_TEAM_9 - './../0_SD_Team_9/grocery_store.db'
OK - DB_TEAM_10 - './../0_SD_Team_10/grocery_team_10_v2.db'
OK - PRODUCTS_CSV - './../2_Product_Mapping/PRODUCTS_MAPPED.csv'


#

## 2. Compile the table definitions

In [5]:
'''
    TABLE_DEFINITIONS is a dict as follows:
        Key - the name of the table in the database
        Value - the CREATE TABLE statement for the table
    I wrote a lot of unused table definitions that will be useful
        in a later HW.
'''
TABLE_DEFINITIONS = {
    'date' : \
            'CREATE TABLE date(' \
                    'DateKey INT, ' \
                    'PrettyDate TEXT, ' \
                    'DayNumberInMonth INT, ' \
                    'DayNumberInYear INT, ' \
                    'WeekNumberInYear INT, ' \
                    'MonthNum INT, ' \
                    'MonthTxt TEXT, ' \
                    'Quarter INT, ' \
                    'Year INT,' \
                    'FiscalYear INT, ' \
                    'isHoliday INT, ' \
                    'isWeekend INT, ' \
                    'Season TEXT' ')',

    'product': \
            'CREATE TABLE product(' \
                    'ProductKey INT,' \
                    'sku INT,' \
                    'product_name TEXT, ' \
                    'product_class_id INT, ' \
                    'subcategory TEXT, ' \
                    'category TEXT, ' \
                    'department TEXT, ' \
                    'product_family TEXT, ' \
                    'size TEXT, ' \
                    'case_count INT, ' \
                    'BrandName TEXT, ' \
                    'Manufacturer TEXT, ' \
                    'Supplier TEXT, ' \
                    'CostToStore REAL)',

    'product_metadata': \
            'CREATE TABLE product_metadata(' \
                    'ProductKey INT,' \
                    'sku INT,' \
                    'old_type TEXT, ' \
                    'meta_code INT,' \
                    'meta_mapped_by TEXT, ' \
                    'meta_reason TEXT)',
    
    'store' : \
            'CREATE TABLE store(' \
                    'StoreKey INT, ' \
                    'StoreManager TEXT, ' \
                    'StoreStreetAddr TEXT, ' \
                    'StoreTown TEXT, ' \
                    'StoreZipCode TEXT, ' \
                    'StorePhoneNumber TEXT, ' \
                    'StoreState TEXT' ')',
    
    'sales_transactions': \
            'CREATE TABLE sales_transactions(' \
                    'DateKey INT, ' \
                    'DailyCustomerNumber INT, ' \
                    'ProductKey INT, ' \
                    'StoreKey INT, ' \
                    'QuantitySold INT, ' \
                    'TotalDollarSales REAL, ' \
                    'TotalCostToStore REAL, ' \
                    'GrossProfit REAL)',

    'sales_daily': \
            'CREATE TABLE sales_transactions(' \
                    'DateKey INT, ' \
                    'DailyCustomerNumber INT, ' \
                    'ProductKey INT, ' \
                    'StoreKey INT, ' \
                    'QuantitySoldToday INT, ' \
                    'CostOfItemsSold REAL, ' \
                    'SalesTotal REAL, ' \
                    'GrossProfit REAL)',

    'inventory_daily' : \
            'CREATE TABLE inventory_daily(' \
                    'DateKey INT, ' \
                    'ProductKey INT, ' \
                    'StoreKey INT, ' \
                    'NumAvailable INT, '
                    'CostToStoreItem FLOAT, ' \
                    'CostToStore FLOAT, ' \
                    'NumCasesPurchasedToDate INT)', 

    'inventory_quarterly' : \
            'CREATE TABLE inventory_quarterly(' \
                    'ProductKey INT, ' \
                    'StoreKey INT, ' \
                    'Quarter INT, ' \
                    'Year INT, ' \
                    'CasesPurchasedToDate INT, ' \
                    'CasesPurchasedThisQuarter INT, ' \
                    'CasesOnHand INT, ' \
                    'TotalCostToStoreThisQuarter FLOAT, ' \
                    'TotalSoldByStoreThisQuarter FLOAT, ' \
                    'TotalCostToStoreThisYTD FLOAT, ' \
                    'TotalSoldByStoreThisYTD FLOAT)'
}


#

## 3. Initialize the Database File and the Database API
#### Note: The first cell in this block is destructive.
#### If you need to see multiple versions of the database side-by-side, rename the db file before rerunning this notebook.

In [6]:
if os.path.isfile(DATA_MART_PATH):
    os.remove(DATA_MART_PATH)

In [7]:
'''
    This class provides one common point of interaction with my team's database.
    Everything that writes to the database uses this API.
'''
class db_options(Enum):
        DEFAULT = 0
        RETURN_RESULTS = 1
        PRINT_RESULTS = 2

class db:
    
    def __init__(self, name):
        self.name = rf"{name}"

    def connect(self):
        self.con = lite.connect(self.name)
        self.cur = self.con.cursor()

    def build_table(self, name):      
        self.execute_sql(f'DROP TABLE IF EXISTS {name}')
        self.execute_sql(TABLE_DEFINITIONS[name])
    
    def execute_sql(self, sql, options=db_options.DEFAULT):
        if (options.value & db_options.RETURN_RESULTS.value):
            results = self.cur.execute(sql).fetchall()
            return results
        elif (options.value & db_options.PRINT_RESULTS.value):
            results = self.cur.execute(sql).fetchall()
            for row in results:
                print(row)
        else:
            self.cur.execute(sql)

    def execute_sql_values(self, sql, values):
        self.cur.execute(sql, values)

    def commit(self):
        self.con.commit()

    def close(self):
        self.con.commit()
        self.con.close()

In [8]:
DB_HANDLES['DATA MART'] = db(DATA_MART_PATH)

#

## 4. Build the Dimension Tables

#### Product Dimension
The presence of the CSV generated by the script is checked earlier.

In [9]:
def build_product_table():
    db_handle = DB_HANDLES['DATA MART']
    
    with open(FILE_PATHS['PRODUCTS_CSV'], 'r') as csvfile:
        db_handle.connect()

        db_handle.build_table('product')
        db_handle.build_table('product_metadata')
        
        for row in csv.DictReader(csvfile):
            product_key = row['product_id']
            sku = row['SKU']
            product_name = row['Product Name']
            product_class_id = row['product_class_id']
            product_subcategory = row['product_subcategory']
            product_category = row['product_category']
            product_department = row['product_department']
            product_family = row['product_family']
            size = row['Size']
            case_count = 12
            brand_name = row['product_subcategory']
            manufacturer = row['Manufacturer']
            supplier = row['Supplier']
            cost_to_store = round(float(Decimal(row['BasePrice'].strip('$'))),2)


            old_type = row['itemType']
            meta_code = row['meta_code']
            meta_mapped_by = row['meta_mapped_by']
            meta_reason = row['meta_reason']

            db_handle.execute_sql_values(sql='insert into product values \
                                    (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', \
                                 values=(product_key, sku, product_name, \
                                        product_class_id, product_subcategory, product_category, product_department, product_family, \
                                        size, case_count,
                                        brand_name, manufacturer, supplier, cost_to_store))

            db_handle.execute_sql_values(sql='insert into product_metadata values \
                                    (?, ?, ?, ?, ?, ?)', \
                                    values=(product_key, sku, old_type, meta_code, meta_mapped_by, meta_reason))
        
        
        print('Product and Product Metadata Tables Populated')
        db_handle.commit()
        db_handle.close()

build_product_table()

Product and Product Metadata Tables Populated


#### Store Dimension
Code originally written by Gideon Kipkorir

In [10]:
data = [
    {
        "StoreKey": 8,
        "StoreManager": "Anthony-Sean-Gideon",
        "StoreStreetAddr": "1180 Seven Seas Dr",
        "StoreTown": "Orlando",
        "StoreZipCode": "32836",
        "StorePhone#": "(407) 824-4500",
        "StoreState": "FL"
    },
    {
        "StoreKey": 9,
        "StoreManager": "Rohith-Sneha",
        "StoreStreetAddr": "201 Mullica Hill Road",
        "StoreTown": "Glassboro",
        "StoreZipCode": "08028",
        "StorePhone#": "(856) 424-2222 x2500",
        "StoreState": "NJ"
    },
    {
        "StoreKey": 10,
        "StoreManager": "Anmol-Nikita-Satya",
        "StoreStreetAddr": "620 Anthony Ung Drive",
        "StoreTown": "Miami",
        "StoreZipCode": "33130",
        "StorePhone#": "(856) 663-8006",
        "StoreState": "FL"
    }
]

def build_store_dimension():
    db_handle = DB_HANDLES['DATA MART']
    db_handle.connect()
    db_handle.build_table('store')

    for store in data:
        db_handle.execute_sql_values(sql='insert into store values \
                                    (?, ?, ?, ?, ?, ?, ?)', \
                                    values=(store['StoreKey'], \
                                            store['StoreManager'], \
                                            store['StoreStreetAddr'], \
                                            store['StoreTown'], \
                                            store['StoreZipCode'], \
                                            store['StorePhone#'], \
                                            store['StoreState']))
    
    db_handle.commit()
    db_handle.close()
    print('Store Dimension Successfully Built')
    

build_store_dimension()

Store Dimension Successfully Built


#### Date Dimension
Logic originally written by Sean Jerzewski

In [11]:
def build_date_dimension():
    db_handle = DB_HANDLES['DATA MART']
    db_handle.connect()
    db_handle.build_table('date')

    start_date = date(2024,1,1)
    end_date = date(2024,12,31)
    
    current_date = start_date
    day_number = 1

    holidays = ["2024-01-01", \
                "2024-01-15", \
                "2024-02-19", \
                "2024-03-29", \
                "2024-05-27", \
                "2024-06-21", \
                "2024-07-04", \
                "2024-09-02", \
                "2024-10-14", \
                "2024-11-05", \
                "2024-11-11", \
                "2024-11-28", \
                "2024-12-25"]

    quarterDict = {1: 2, \
                   2: 3, 3: 3, 4: 3, \
                   5: 4, 6: 4, 7: 4, \
                   8: 1, 9: 1, 10: 1, \
                   11: 2, 12: 2}
    
    spring = date(2024,3,21)
    summer = date(2024,6,21)
    fall = date(2024,9,21)
    winter = date(2024,12,21)

    while (current_date <= end_date):
        DateKey = day_number
        PrettyDate = current_date.strftime('%Y-%M-%D')
        DayNumberInMonth = current_date.strftime('%d')
        DayNumberInYear = day_number
        WeekNumberInYear = current_date.strftime('%W')
        MonthNum = current_date.strftime('%m')
        MonthTxt = current_date.strftime('%B')
        Quarter = quarterDict[current_date.month]
        Year = current_date.year
        FiscalYear = 2023 if current_date.month < 8 else 2024
        isHoliday = 'True' if current_date.strftime('%Y-%m-%d') in holidays else 'False'

        # 'False' is more typical than True
        isWeekend = 'False' if current_date.weekday() < 5 else 'True'

        if spring <= current_date < summer:
            season = "Spring"
        elif summer <= current_date < fall:
            season = "Summer"
        elif fall <= current_date < winter:
            season = "Fall"
        else:
            season = "Winter"

        db_handle.execute_sql_values(sql='insert into date values \
                                    (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', \
                                    values=(DateKey, \
                                            PrettyDate, \
                                            DayNumberInMonth, \
                                            DayNumberInYear, \
                                            WeekNumberInYear, \
                                            MonthNum, \
                                            MonthTxt, \
                                            Quarter, \
                                            Year, \
                                            FiscalYear, \
                                            isHoliday, \
                                            isWeekend, \
                                            season))
        
        day_number += 1
        current_date += timedelta(days=1)

    db_handle.commit()
    db_handle.close()
    print('Date Dimension Successfully Built')

build_date_dimension()

OperationalError: no such table: date