# The Wedge


In [1]:
import os
import shutil
import re
import datetime 

import pandas as pd
import numpy as np
import pandas_gbq
import janitor

# Do our imports for the code
from google.cloud import bigquery
from google.oauth2 import service_account

## Define Global Variables

In [2]:
# # Small File Sample
# zip_file_name = "WedgeZipOfZips_Small.zip"

## Full data Set
# zip_file_name = "WedgeZipOfZips.zip"

# Clean data Set
zip_file_name = "WedgeFiles_Clean.zip"

# Working Directory
working_directory = "/media/psf/Home/Repos/BMKT670.V60-72020-Fall2022-Wedge-Project/eggs/"

## Define Functions

In [13]:
def display_zip_contents(zipped_files):
    for file_name in zipped_files :
        # Ignore __MACOSX hidden files
        if(file_name.startswith( '__' )):
            continue
        # Ignore folders
        if(file_name.endswith( '/' )):
            continue

        print("File: ", file_name," Size:", os.path.getsize(working_directory+file_name))

def display_file_contents(files):
    for file_name in files :
        # Ignore __MACOSX hidden files
        if(file_name.startswith( '__' )):
            continue
        # Ignore folders
        if(file_name.endswith( '/' )):
            continue

        print("File: ", file_name," Size:", os.path.getsize(working_directory+file_name))

def upload_data(data):
    # https://stackoverflow.com/a/24083253
    grouped = data.groupby(pd.Grouper(freq='M'))
    for name, group in grouped:

        # Construct table name from index
        # table_name = "dram_items_"+reformat_date(name.strftime('%Y-%m-%d'))

        # 3. For each month in the file, subset the data to that month and 
        #    upload the data to a table called `dram_items_YYYYMM01`. 
        # table_id = ".".join([gbq_proj_id,dataset_id,table_name])
        # print(table_id)
        # pandas_gbq.to_gbq(item_lu, table_id, project_id=gbq_proj_id,if_exists="replace") # let's discuss this last bit
        print("Data Uploaded!")
        
def cleanup_data(data): 
    # Clean the names with the janitor package.
    data = janitor.clean_names(data)

#     for column in ( 'gross_sales', 'discounts', 'net_sales', 'tax' ):
#         # Convert the fields that have dollar signs (such as `gross_sales`) into numeric data. Watch out for dollar signs and commas.
#         data[column] = ( data[column]
#                            .str.replace("$", '', regex=False)
#                            .str.replace(",", '', regex=False)
#                            .astype(float) )

    # Change the type of the column `modifiers_applied` to string.
#     data['modifiers_applied'] = data['modifiers_applied'].astype(str)

    # Replace the `sku` column with a column of empty strings. 
#     data['sku'] = ''

    # print( item_lu.head() )
#     data.index = pd.to_datetime(data['date']) # ,format='%y-%m-%d'  
    return data
        
def process_file(file_name):
    # print(file_name)
    data = pd.read_csv(working_directory+file_name, low_memory=False)
  
    # 2. Do the same cleaning we did in Part 1 (clean names, 
    #    make sku an empty string, fix dollars, make modifiers_applied a string)
    data = cleanup_data(data)
    upload_data(data)


def data_columns():
    # https://www.geeksforgeeks.org/add-column-names-to-dataframe-in-pandas/
    columns = []
    # 1.	datetime: timestamp of the transaction-row creation   
    columns.append('datetime') # 
    # 2.	register_no: register for transaction
    columns.append('register_no') # 
    # 3.	emp_no: employee number for cashier   
    columns.append('emp_no') # 
    # 4.	trans_no: transaction number. This number counts up by day and is only unique when combined with date, columns.append('trans_no') # register and employee.
    columns.append('trans_no')
    # 5.	Upc: Universal Product Code for the item. 0 for non-items.
    columns.append('Upc') # 
    # 6.	description: product description. Includes things like Tax, Tender type, etc.   
    columns.append('description') # 
    # 7.	trans_type: One of five values (D, G, A, T, and I). These correspond to the following types of columns.append('trans_type') # transactions:
    # •	D: Departmental rings, when the cashier just selects a department for the item.
    # •	G: Green patch donations. This is the donation made for shoppers who bring their own bag. 
    # •	A: Tax
    # •	T: Tender, the payment row.
    # •	I: Items, but also includes discounts. 
    columns.append('trans_type')
    # 8.	trans_subtype: There are a lot of these. Key ones include methods of payment (CK for Check, CA for columns.append('trans_subtype') # Cash, CP for coupon, EF for EBT Food Stamps , WC for WIC). These are often blank for other trans_type values.
    columns.append('trans_subtype')
    # 9.	trans_status: An important field. The field trans_status tells us more about the types transactions. columns.append('trans_status') # Here are the possible values:
    # •	Blank: The typical value.
    # •	M: Member discounts.
    # •	V: Voids 
    # •	C: Coupons
    # •	0: Honestly, I think these are supposed to be blanks but they changed from 0s at some point in February 2010. 
    # •	R: Returns.
    # •	J: Juice club cards
    columns.append('trans_status')
    # 10.	department: The number of the department. See the next appendix for a department lookup table.
    columns.append('department') # 
    # 11.	quantity: The purchased quantity. Beware, some items such as flowers and bulk vegetables are priced per  # cent and then sold in very large quantities (like 1000 for a $10 bouquet.)   
    columns.append('quantity')
    # 12.	Scale: The reading on the scale. Note that the capital here is not a typo. This is one field that 
    columns.append('Scale') # weirdly has a capital first letter.    
    # 13.	cost: the per-unit cost of an item to the Wedge. This is not uniformly populated. 
    columns.append('cost') # 
    # 14.	unitPrice:  the per-unit cost of an item to an owner. Negative for things like returns and discounts.
    columns.append('unitPrice') # 
    # 15.	total: price times quantity. The cost of the line item. Note that this can be negative because columns.append('total') # unitPrice can be negative.     
    columns.append('total')
    # 16.	regPrice: The regular price of an item. May be different from unitPrice but unitPrice plus discount columns.append('regPrice') # should be regPrice.   
    columns.append('regPrice')
    # 17.	altPrice
    columns.append('altPrice') # 
    # 18.	tax: an indicator of whether or not the item is taxable.   
    columns.append('tax') # 
    # 19.	taxexempt: mostly zero.   
    columns.append('taxexempt') # 
    # 20.	foodstamp: can the item be purchased with food stamps?   
    columns.append('foodstamp') # 
    # 21.	wicable: can the item be purchased with WIC?   
    columns.append('wicable') # 
    # 22.	discount: a marker of any discounts.    
    columns.append('discount') # 
    # 23.	memDiscount: the member discounts on items.   
    columns.append('memDiscount') # 
    # 24.	discountable: beats me.   
    columns.append('discountable') # 
    # 25.	discounttype: there’s probably information in here, but I haven’t decoded it.
    columns.append('discounttype') # 
    # 26.	voided: I think it’s used if an item is a void or if an item was run up and subsequently voided.   
    columns.append('voided') # 
    # 27.	percentDiscount: I don’t use it.   
    columns.append('percentDiscount') # 
    # 28.	ItemQtty: I’m not sure what this is.   
    columns.append('ItemQtty') # 
    # 29.	volDiscType: Ditto   
    columns.append('volDiscType') # 
    # 30.	volume: Ditto
    columns.append('volume') # 
    # 31.	VolSpecial: Ditto   
    columns.append('VolSpecial') # 
    # 32.	mixMatch: Ditto   
    columns.append('mixMatch') # 
    # 33.	matched: Ditto   
    columns.append('matched') # 
    # 34.	memType: Mostly NULL or 1, but I’m not sure what it signifies. Maybe institutional memberships?   
    columns.append('memType') # 
    # 35.	staff: indicative of staff transactions perhaps?   
    columns.append('staff') # 
    # 36.	numflag: A complicated bitflag that encodes a bunch of other information. I’ll add the communication on columns.append('numflag') # this topic to an appendix below, but it’s not critical for our purposes.   
    columns.append('numflag')
    # 37.	Itemstatus: Don’t know   
    columns.append('Itemstatus') # 
    # 38.	tenderstatus: Ditto   
    columns.append('tenderstatus') # 
    # 39.	charflag: Ditto   
    columns.append('charflag') # 
    # 40.	varflag: Ditto   
    columns.append('varflag') # 
    # 41.	batchHeaderID: Ditto   
    columns.append('batchHeaderID') # 
    # 42.	local: is the item local?   
    columns.append('local') # 
    # 43.	organic: is the item organic?   
    columns.append('organic') # 
    # 44.	display: Don’t know.   
    columns.append('display') # 
    # 45.	receipt: Ditto   
    columns.append('receipt') # 
    # 46.	card_no: This one is important. This is the masked owner number for the transaction. It is an integer. columns.append('card_no') # If the value is 3, then the transaction is for a non-owner. You’ll find some owners (like 11572) that have a huge number of transactions. These are likely other co-ops. If you are a member of, say, the Seward Co-op you can receive discounts at the Wedge. The cashier selects your co-op and the receipt is flagged as being from that co-op.    
    columns.append('card_no')
    # 47.	store: 1 for the main store and 512 for catering.   
    columns.append('store') # 
    # 48.	branch: 0 for the main store and 3 for the Wedge Table, a grab-and-go bodega they opened in January columns.append('branch') # 2015.  
    columns.append('branch')
    # 49.	match_id: don’t know   
    columns.append('match_id') # 
    # 50.	trans_id: a counter that increments the line items of a receipt.
    columns.append('trans_id') # 
    print(columns)
    return columns


def dtype_columns():
    # https://www.geeksforgeeks.org/add-column-names-to-dataframe-in-pandas/
    columns = {}
    # 1.	datetime: timestamp of the transaction-row creation   
    columns.update({'datetime':'string'}) # 
    # 2.	register_no: register for transaction
    columns.update({'register_no':'string'}) # 
    # 3.	emp_no: employee number for cashier   
    columns.update({'emp_no':'string'}) # 
    # 4.	trans_no: transaction number. This number counts up by day and is only unique when combined with date, columns.update({'trans_no':'string'}) # register and employee.
    columns.update({'trans_no':'string'})
    # 5.	Upc: Universal Product Code for the item. 0 for non-items.
    columns.update({'Upc':'string'}) # 
    # 6.	description: product description. Includes things like Tax, Tender type, etc.   
    columns.update({'description':'string'}) # 
    # 7.	trans_type: One of five values (D, G, A, T, and I). These correspond to the following types of columns.update({'trans_type':'string'}) # transactions:
    # •	D: Departmental rings, when the cashier just selects a department for the item.
    # •	G: Green patch donations. This is the donation made for shoppers who bring their own bag. 
    # •	A: Tax
    # •	T: Tender, the payment row.
    # •	I: Items, but also includes discounts. 
    columns.update({'trans_type':'string'})
    # 8.	trans_subtype: There are a lot of these. Key ones include methods of payment (CK for Check, CA for columns.update({'trans_subtype':'string'}) # Cash, CP for coupon, EF for EBT Food Stamps , WC for WIC). These are often blank for other trans_type values.
    columns.update({'trans_subtype':'string'})
    # 9.	trans_status: An important field. The field trans_status tells us more about the types transactions. columns.update({'trans_status':'string'}) # Here are the possible values:
    # •	Blank: The typical value.
    # •	M: Member discounts.
    # •	V: Voids 
    # •	C: Coupons
    # •	0: Honestly, I think these are supposed to be blanks but they changed from 0s at some point in February 2010. 
    # •	R: Returns.
    # •	J: Juice club cards
    columns.update({'trans_status':'string'})
    # 10.	department: The number of the department. See the next appendix for a department lookup table.
    columns.update({'department':'string'}) # 
    # 11.	quantity: The purchased quantity. Beware, some items such as flowers and bulk vegetables are priced per  # cent and then sold in very large quantities (like 1000 for a $10 bouquet.)   
    columns.update({'quantity':'string'})
    # 12.	Scale: The reading on the scale. Note that the capital here is not a typo. This is one field that 
    columns.update({'Scale':'string'}) # weirdly has a capital first letter.    
    # 13.	cost: the per-unit cost of an item to the Wedge. This is not uniformly populated. 
    columns.update({'cost':'string'}) # 
    # 14.	unitPrice:  the per-unit cost of an item to an owner. Negative for things like returns and discounts.
    columns.update({'unitPrice':'string'}) # 
    # 15.	total: price times quantity. The cost of the line item. Note that this can be negative because columns.update({'total':'string'}) # unitPrice can be negative.     
    columns.update({'total':'string'})
    # 16.	regPrice: The regular price of an item. May be different from unitPrice but unitPrice plus discount columns.update({'regPrice':'string'}) # should be regPrice.   
    columns.update({'regPrice':'string'})
    # 17.	altPrice
    columns.update({'altPrice':'string'}) # 
    # 18.	tax: an indicator of whether or not the item is taxable.   
    columns.update({'tax':'string'}) # 
    # 19.	taxexempt: mostly zero.   
    columns.update({'taxexempt':'string'}) # 
    # 20.	foodstamp: can the item be purchased with food stamps?   
    columns.update({'foodstamp':'string'}) # 
    # 21.	wicable: can the item be purchased with WIC?   
    columns.update({'wicable':'string'}) # 
    # 22.	discount: a marker of any discounts.    
    columns.update({'discount':'string'}) # 
    # 23.	memDiscount: the member discounts on items.   
    columns.update({'memDiscount':'string'}) # 
    # 24.	discountable: beats me.   
    columns.update({'discountable':'string'}) # 
    # 25.	discounttype: there’s probably information in here, but I haven’t decoded it.
    columns.update({'discounttype':'string'}) # 
    # 26.	voided: I think it’s used if an item is a void or if an item was run up and subsequently voided.   
    columns.update({'voided':'string'}) # 
    # 27.	percentDiscount: I don’t use it.   
    columns.update({'percentDiscount':'string'}) # 
    # 28.	ItemQtty: I’m not sure what this is.   
    columns.update({'ItemQtty':'string'}) # 
    # 29.	volDiscType: Ditto   
    columns.update({'volDiscType':'string'}) # 
    # 30.	volume: Ditto
    columns.update({'volume':'string'}) # 
    # 31.	VolSpecial: Ditto   
    columns.update({'VolSpecial':'string'}) # 
    # 32.	mixMatch: Ditto   
    columns.update({'mixMatch':'string'}) # 
    # 33.	matched: Ditto   
    columns.update({'matched':'string'}) # 
    # 34.	memType: Mostly NULL or 1, but I’m not sure what it signifies. Maybe institutional memberships?   
    columns.update({'memType':'string'}) # 
    # 35.	staff: indicative of staff transactions perhaps?   
    columns.update({'staff':'string'}) # 
    # 36.	numflag: A complicated bitflag that encodes a bunch of other information. I’ll add the communication on columns.update({'numflag':'string'}) # this topic to an appendix below, but it’s not critical for our purposes.   
    columns.update({'numflag':'string'})
    # 37.	Itemstatus: Don’t know   
    columns.update({'Itemstatus':'string'}) # 
    # 38.	tenderstatus: Ditto   
    columns.update({'tenderstatus':'string'}) # 
    # 39.	charflag: Ditto   
    columns.update({'charflag':'string'}) # 
    # 40.	varflag: Ditto   
    columns.update({'varflag':'string'}) # 
    # 41.	batchHeaderID: Ditto   
    columns.update({'batchHeaderID':'string'}) # 
    # 42.	local: is the item local?   
    columns.update({'local':'string'}) # 
    # 43.	organic: is the item organic?   
    columns.update({'organic':'string'}) # 
    # 44.	display: Don’t know.   
    columns.update({'display':'string'}) # 
    # 45.	receipt: Ditto   
    columns.update({'receipt':'string'}) # 
    # 46.	card_no: This one is important. This is the masked owner number for the transaction. It is an integer. If the value is 3, then the transaction is for a non-owner. You’ll find some owners (like 11572) that have a huge number of transactions. These are likely other co-ops. If you are a member of, say, the Seward Co-op you can receive discounts at the Wedge. The cashier selects your co-op and the receipt is flagged as being from that co-op.    
    columns.update({'card_no':'string'})
    # 47.	store: 1 for the main store and 512 for catering.   
    columns.update({'store':'string'}) # 
    # 48.	branch: 0 for the main store and 3 for the Wedge Table, a grab-and-go bodega they opened in January 2015.  
    columns.update({'branch':'string'})
    # 49.	match_id: don’t know   
    columns.update({'match_id':'string'}) # 
    # 50.	trans_id: a counter that increments the line items of a receipt.
    columns.update({'trans_id':'string'}) # 
    
    # print(columns)

    return columns



In [4]:
# These first two values will be different on your machine. 
# service_path = "/Users/chandler/Dropbox/Teaching/"
# service_file = 'umt-msba-037daf11ee16.json' # change this to your authentication information  
# gbq_proj_id = 'umt-msba' # change this to your project. 
service_path = "/media/psf/Home/Repos/"
service_file = 'dev-firmament-363722-cdd41b116dec.json' # change this to your authentication information  
gbq_proj_id = 'bmkt670-fall2022-wedge-project' # change this to your project. 
dataset_id = 'wedgedataset'

# And this should stay the same. 
private_key =service_path + service_file

# Now we pass in our credentials so that Python has permission to access our project.
credentials = service_account.Credentials.from_service_account_file(service_path + service_file)

# And finally we establish our connection
client = bigquery.Client(credentials = credentials, project=gbq_proj_id)

# for item in client.list_datasets() : 
#     print(item.full_dataset_id)

### Checking for and deleting monthly tables

We'll get all the tables in our wedge data set that match our pattern, then delete them. We do not want to accidentally delete the item lookup table that we put in this data set in class. 

## Phase 1, Upload Clean Files

In [5]:
# In this cell, do the following: 

# Master list of all data files
data_files = []

from zipfile import ZipFile # usually you'd do all these imports at the beginning

with ZipFile( zip_file_name, 'r') as zf : 
    # printing what's in the zip file.  
    # zf.printdir() 
  
    # extracting all the files 
    print('Extracting all the files now...') 
    # pick a folder name already in .gitignore
    
    # Instead of always extracting ALL, check if file exists first
    # zf.extractall(working_directory) 
    
    zipped_files = zf.namelist()
    # display_zip_contents(zipped_files)
    
    # Only extract files if they don't exist
    for file_name in zipped_files :
        
        # Ignore __MACOSX hidden files
        if(file_name.startswith( '__' )):
            continue
        
        # Ignore folders
        if(file_name.endswith( '/' )):
            continue
        
        if os.path.exists(working_directory + file_name) :
            print("File Exists, skipping")
            print(file_name)
        else :
            print("Need to Extract")
            print(file_name)
            zf.extract(file_name, working_directory) 
        
        data_files.append(file_name)
            
    
    print('Done Extracting!')
    # print(zf.namelist())
    # zipped_files = zf.namelist()
    # display_zip_contents(zipped_files)
    
#     for file_name in zipped_files :
#         # Ignore __MACOSX hidden files
#         if(file_name.startswith( '__' )):
#             continue
#         # Ignore folders
#         if(file_name.endswith( '/' )):
#             continue

#         for tmp_file_name in tmp_zipped_files :
#                 data_files.append(tmp_file_name)
            
        # print("File: ", file_name," Size:", os.path.getsize(working_directory+file_name))
#         with ZipFile(working_directory+file_name, 'r') as zftmp :
#             zftmp.extractall(working_directory)
#             tmp_zipped_files = zftmp.namelist()
#             # display_zip_contents(tmp_zipped_files)
#             for tmp_file_name in tmp_zipped_files :
#                 data_files.append(tmp_file_name)

print("Done building file list")
                




Extracting all the files now...
File Exists, skipping
clean-files/transArchive_201001_201003_clean.csv
File Exists, skipping
clean-files/transArchive_201004_201006_clean.csv
File Exists, skipping
clean-files/transArchive_201007_201009_clean.csv
File Exists, skipping
clean-files/transArchive_201010_201012_clean.csv
File Exists, skipping
clean-files/transArchive_201101_201103_clean.csv
File Exists, skipping
clean-files/transArchive_201104_clean.csv
File Exists, skipping
clean-files/transArchive_201105_clean.csv
File Exists, skipping
clean-files/transArchive_201106_clean.csv
File Exists, skipping
clean-files/transArchive_201107_201109_clean.csv
File Exists, skipping
clean-files/transArchive_201110_201112_clean.csv
File Exists, skipping
clean-files/transArchive_201201_201203_clean.csv
File Exists, skipping
clean-files/transArchive_201201_201203_inactive_clean.csv
File Exists, skipping
clean-files/transArchive_201204_201206_clean.csv
File Exists, skipping
clean-files/transArchive_201204_201

## Verify csv Files

In [6]:
# print(data_files)
display_file_contents(data_files)


File:  clean-files/transArchive_201001_201003_clean.csv  Size: 580722504
File:  clean-files/transArchive_201004_201006_clean.csv  Size: 615914501
File:  clean-files/transArchive_201007_201009_clean.csv  Size: 579071066
File:  clean-files/transArchive_201010_201012_clean.csv  Size: 576701531
File:  clean-files/transArchive_201101_201103_clean.csv  Size: 571099632
File:  clean-files/transArchive_201104_clean.csv  Size: 207906842
File:  clean-files/transArchive_201105_clean.csv  Size: 208014327
File:  clean-files/transArchive_201106_clean.csv  Size: 192943550
File:  clean-files/transArchive_201107_201109_clean.csv  Size: 584955463
File:  clean-files/transArchive_201110_201112_clean.csv  Size: 608577984
File:  clean-files/transArchive_201201_201203_clean.csv  Size: 582549927
File:  clean-files/transArchive_201201_201203_inactive_clean.csv  Size: 48420527
File:  clean-files/transArchive_201204_201206_clean.csv  Size: 599406739
File:  clean-files/transArchive_201204_201206_inactive_clean.csv

## Uploading...

In [16]:
# 1. Read in the items files one at a time.
for file_name in data_files :
 
    # https://stackoverflow.com/a/27232309
    transactions = pd.read_csv(working_directory+file_name, 
                               header=None, 
                               names=data_columns(), 
                               dtype=dtype_columns()
                              ) # 
  

    # Construct table name from index
    table_name = "wedge_"+file_name.replace(".","-").replace("/","-")
    # print(type(name))

    # 3. For each month in the file, subset the data to that month and 
    #    upload the data to a table called `dram_items_YYYYMM01`. 
    table_id = ".".join([gbq_proj_id,dataset_id,table_name])
    # print(table_id)
    pandas_gbq.to_gbq(transactions, table_id, project_id=gbq_proj_id,if_exists="replace") # let's discuss this last bit

print("Completed Uploads!")

['datetime', 'register_no', 'emp_no', 'trans_no', 'Upc', 'description', 'trans_type', 'trans_subtype', 'trans_status', 'department', 'quantity', 'Scale', 'cost', 'unitPrice', 'total', 'regPrice', 'altPrice', 'tax', 'taxexempt', 'foodstamp', 'wicable', 'discount', 'memDiscount', 'discountable', 'discounttype', 'voided', 'percentDiscount', 'ItemQtty', 'volDiscType', 'volume', 'VolSpecial', 'mixMatch', 'matched', 'memType', 'staff', 'numflag', 'Itemstatus', 'tenderstatus', 'charflag', 'varflag', 'batchHeaderID', 'local', 'organic', 'display', 'receipt', 'card_no', 'store', 'branch', 'match_id', 'trans_id']
{'datetime': 'string', 'register_no': 'string', 'emp_no': 'string', 'trans_no': 'string', 'Upc': 'string', 'description': 'string', 'trans_type': 'string', 'trans_subtype': 'string', 'trans_status': 'string', 'department': 'string', 'quantity': 'string', 'Scale': 'string', 'cost': 'string', 'unitPrice': 'string', 'total': 'string', 'regPrice': 'string', 'altPrice': 'string', 'tax': 'str

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 2685.21it/s]

Completed Uploads!





## Phase 2, Clean & Upload

In [None]:
# for file_name in data_files :
#     process_file(file_name)

In [None]:
# 1. Read in the items files one at a time.
    for file_name in zipped_files :
        # Ignore __MACOSX hidden files
        if(file_name.startswith( '__' )):
            continue
        
        # print(file_name)
        item_lu = ( pd.read_csv("./eggs/"+file_name, low_memory=False)
           #.remove_empty()
           #.dropna()
           #.clean_names() # ,sep="\t"
          )
        # 2. Do the same cleaning we did in Part 1 (clean names, 
        #    make sku an empty string, fix dollars, make modifiers_applied a string)

        # Clean the names with the janitor package.
        item_lu = janitor.clean_names(item_lu)

        for column in ( 'gross_sales', 'discounts', 'net_sales', 'tax' ):
            # Convert the fields that have dollar signs (such as `gross_sales`) into numeric data. Watch out for dollar signs and commas.
            item_lu[column] = ( item_lu[column]
                               .str.replace("$", '', regex=False)
                               .str.replace(",", '', regex=False)
                               .astype(float) )

        # Change the type of the column `modifiers_applied` to string.
        item_lu['modifiers_applied'] = item_lu['modifiers_applied'].astype(str)

        # Replace the `sku` column with a column of empty strings. 
        item_lu['sku'] = ''
        
        # print( item_lu.head() )
        item_lu.index = pd.to_datetime(item_lu['date']) # ,format='%y-%m-%d'

        # https://stackoverflow.com/a/24083253
        grouped = item_lu.groupby(pd.Grouper(freq='M'))
        for name, group in grouped:
            # print(name)
            # print(len(group))
            
            # Construct table name from index
            table_name = "dram_items_"+reformat_date(name.strftime('%Y-%m-%d'))
            # print(type(name))
            
            # 3. For each month in the file, subset the data to that month and 
            #    upload the data to a table called `dram_items_YYYYMM01`. 
            table_id = ".".join([gbq_proj_id,dataset_id,table_name])
            # print(table_id)
            pandas_gbq.to_gbq(item_lu, table_id, project_id=gbq_proj_id,if_exists="replace") # let's discuss this last bit


# Cleanup ALL Local Files

In [None]:
# https://linuxize.com/post/python-delete-files-and-directories/
try:
    shutil.rmtree(working_directory)
    print('Done Cleanup')
    print("Completed Exit Code 0")
except OSError as e:
    print("Error: %s : %s" % (working_directory, e.strerror))
    print("Completed Exit Code -1")
