In [1]:
from google.cloud import bigquery
import os
import zipfile
import polars as pl
import pandas as pd
from datetime import datetime
from pandas_gbq import to_gbq

# Define the dataset and table names
dataset_id = 'wedge-project-JBangtson.the_wedge_dataset'

# Initialize a BigQuery client
client = bigquery.Client(project='wedge-project-jbangtson')


data_directory = "E:\\College\\Fall 2024\\ADA\\Wedge\\Wedge_Project\\data\\unzipped\\"

## Importing the files in as lazy dataframes.

This script efficiently loads multiple CSV files into Polars LazyFrame objects, allowing for optimized data processing without immediately materializing the data into memory. It selectively handles CSVs based on their naming convention (specifically files marked as "inactive") and manages errors gracefully during the loading process.

In [3]:
# Need to put all csvs into there own dfs
#https://chatgpt.com/share/66e4ad8b-ea5c-8000-9117-d884dd0bbfb3


# Initialize an empty list to store LazyFrames
lazy_df_list = []

# Loop through files and load lazily
for idx, file in enumerate(os.listdir(data_directory)):
    

    file_path = os.path.join(data_directory, file)


    if len(os.listdir(data_directory)[idx].split("_")) >= 4 and os.listdir(data_directory)[idx].split("_")[3] == "inactive.csv":
        # Use LazyFrame for efficient processing
        lazy_df = pl.scan_csv(file_path, has_header=True, null_values=["\\N"], ignore_errors=True,separator=";")

    else:
        # Use LazyFrame for efficient processing
        lazy_df = pl.scan_csv(file_path, has_header=True, null_values=["\\N"], ignore_errors=True)
    
    # Append LazyFrame to the list
    lazy_df_list.append(lazy_df)

# Example: Materialize (collect) one of the lazy DataFrames to inspect it
df = lazy_df_list[0].collect()
#df1 = lazy_df_list[38].collect()

clean_columns = df.columns
#print(df.head())


## Creating GBQ Schema

This code defines a schema for a BigQuery table using bigquery.SchemaField objects. The schema specifies the structure of the data, including the column names, data types (e.g., FLOAT, STRING, BOOLEAN, TIMESTAMP), and whether each field is nullable. This schema can be used to load, query, and manage structured transaction data in BigQuery.

In [4]:

wedge_schema = [
    bigquery.SchemaField("datetime", "TIMESTAMP", mode="NULLABLE"),
    bigquery.SchemaField("register_no", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("emp_no", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("trans_no", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("upc", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("description", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("trans_type", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("trans_subtype", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("trans_status", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("department", "FLOAT", mode="NULLABLE"),#
    bigquery.SchemaField("quantity", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("scale", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("cost", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("unitPrice", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("total", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("regPrice", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("altPrice", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("tax", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("taxexempt", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("foodstamp", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("wicable", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("discount", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("memDiscount", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("discountable", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("discounttype", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("voided", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("percentDiscount", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("itemQtty", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("volDiscType", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("volume", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("volSpecial", "FLOAT", mode="NULLABLE"),###
    bigquery.SchemaField("mixMatch", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("matched", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("memType", "BOOLEAN", mode="NULLABLE"),
    bigquery.SchemaField("staff", "BOOLEAN", mode="NULLABLE"),
    bigquery.SchemaField("numflag", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("itemstatus", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("tenderstatus", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("charflag", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("varflag", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("batchHeaderID", "BOOLEAN", mode="NULLABLE"),
    bigquery.SchemaField("local", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("organic", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("display", "BOOLEAN", mode="NULLABLE"),
    bigquery.SchemaField("receipt", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("card_no", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("store", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("branch", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("match_id", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("trans_id", "FLOAT", mode="NULLABLE"),
]



----
## Cleaning/Casting Methods

### Wedge Cleaner 
The wedge_cleaner function processes a pandas DataFrame to ensure data consistency and cleanliness by applying specific type-safe casting functions to each column based on its name. It handles columns differently based on their expected data type:

    Datetime Columns: Applies datetime_safe_cast for proper datetime formatting.
    Float Columns: Applies float_safe_cast to ensure values are converted to floats.
    String Columns: Applies string_safe_cast for consistent string formatting.
    Boolean Columns: Applies boolean_safe_cast to correctly convert values to boolean.

The function iterates over each column, performs the appropriate casting, and returns the cleaned DataFrame.

In [5]:

def wedge_cleaner(pandas_df):
    # Apply the function within the loop


    for col_name in pandas_df:
        print(f'Column: {col_name}')

        # Datetime
        if col_name == "datetime":
            pandas_df[col_name] = pandas_df[col_name].apply(datetime_safe_cast)

        # Float
        if col_name in ["register_no", "emp_no", "trans_no", "department", "quantity", "scale", "cost", "unitPrice", "total", "regPrice", "altPrice", "tax", "taxexempt", "foodstamp", "wicable", "discount", "memDiscount", "discountable", "discounttype", "voided", "percentDiscount", "itemQtty", "volDiscType", "volume", "volSpecial", "mixMatch", "matched", "numflag", "itemstatus", "tenderstatus", "varflag", "local", "organic", "receipt", "card_no", "store", "branch", "match_id", "trans_id"]:
            pandas_df[col_name] = pandas_df[col_name].apply(float_safe_cast)

        # String
        if col_name in ["upc", "description", "trans_type", "trans_subtype", "trans_status", "charflag"]:
            pandas_df[col_name] = pandas_df[col_name].apply(string_safe_cast)

        # Boolean
        if col_name in ["memType", "staff", "batchHeaderID", "display"]:
            pandas_df[col_name] = pandas_df[col_name].apply(boolean_safe_cast)
        



        #Boolean
        if col_name == "memType" or col_name == "staff" or col_name == "batchHeaderID" or col_name == "display":
            pandas_df[col_name] = pandas_df[col_name].apply(boolean_safe_cast)

    return pandas_df
    
    #print(col_data)

### Datetime

In [6]:
from datetime import datetime

# Date time

def datetime_safe_cast(val):
    try:
        return datetime.strptime(val, '%Y-%m-%d %H:%M:%S')
    except (ValueError, TypeError):
        return None  # or another value to handle invalid cases

        

### Float

In [7]:
# Float
def float_safe_cast(val):
    try:
        return float(val) if val is not None else None
    except ValueError:
        return None  # or another value to handle invalid cases




### String

In [8]:
# String
def string_safe_cast(val):
    try:
        return str(val)
    except ValueError:
        return ""  # or another value to handle invalid cases



### Boolean

In [9]:

# Boolean
def boolean_safe_cast(val):
    try:
        return bool(val)
    except ValueError:
        return None  # or another value to handle invalid cases

## Cleans and Uploads to GBQ (🙏)

This script processes a list of Polars LazyFrame objects by converting them to pandas DataFrames, cleaning them using the wedge_cleaner function, and then uploading the cleaned DataFrames to Google BigQuery (GBQ).
Steps:
Iterate Over LazyFrames:
For each LazyFrame in lazy_df_list, the script prints the file number and name.
Collect and Clean Data:
Convert the LazyFrame to a pandas DataFrame (df) and set column names to clean_columns.
Use wedge_cleaner to clean the DataFrame.
Upload to GBQ:
Print status messages indicating the start and completion of the upload process.
Define the project ID and destination table name for BigQuery.
Upload the cleaned DataFrame to BigQuery using to_gbq() with the if_exists='replace' parameter to replace the table if it already exists.
This code ensures that each file is processed, cleaned, and uploaded to a specified BigQuery table.

In [10]:
# Date time
#df = lazy_df_list[45].collect()
#df.columns = clean_columns






#pandas_df = df.to_pandas()

for idx, lazy_df in enumerate(lazy_df_list):
    print(f"Cleaning file number: {idx}\nFile Name: {os.listdir(data_directory)[idx]}")

    df = lazy_df.collect()
    df.columns = clean_columns
    clean_panda_df = df.to_pandas()
    
    clean_panda_df = wedge_cleaner(clean_panda_df)
    
    

    print(f"Finished cleaning file number: {idx}\nFile Name: {os.listdir(data_directory)[idx]}!\n\n------------------------------------")

    print(f"Uploading file number: {idx}\nFile Name: {os.listdir(data_directory)[idx]} to GBQ")

    # Define project_id and destination table
    project_id = 'wedge-project-jbangtson'
    table_name = str(os.listdir(data_directory)[idx]).split(".")[0]
    destination_table = f'the_wedge_dataset.{table_name}'


    to_gbq(clean_panda_df, destination_table, project_id=project_id, if_exists='replace')
   

Cleaning file number: 0
File Name: transArchive_201001_201003.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 0
File Name: transArchive_201001_201003.csv

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 1
File Name: transArchive_201004_201006.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 1
File Name: transArchive_201004_201006.csv

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 2
File Name: transArchive_201007_201009.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 2
File Name: transArchive_201007_201009.csv

100%|██████████| 1/1 [00:00<00:00, 1659.80it/s]


Cleaning file number: 3
File Name: transArchive_201010_201012.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 3
File Name: transArchive_201010_201012.csv

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 4
File Name: transArchive_201101_201103.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 4
File Name: transArchive_201101_201103.csv

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 5
File Name: transArchive_201104.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 5
File Name: transArchive_201104.csv!

-----------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 6
File Name: transArchive_201105.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 6
File Name: transArchive_201105.csv!

-----------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 7
File Name: transArchive_201106.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 7
File Name: transArchive_201106.csv!

-----------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 8
File Name: transArchive_201107_201109.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 8
File Name: transArchive_201107_201109.csv

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 9
File Name: transArchive_201110_201112.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 9
File Name: transArchive_201110_201112.csv

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 10
File Name: transArchive_201201_201203.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 10
File Name: transArchive_201201_201203.c

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 11
File Name: transArchive_201201_201203_inactive.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 11
File Name: transArchive_201201

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 12
File Name: transArchive_201204_201206.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 12
File Name: transArchive_201204_201206.c

GenericGBQException: Reason: 403 Quota exceeded: Your project exceeded quota for free storage for projects. For more information, see https://cloud.google.com/bigquery/docs/troubleshoot-quotas; reason: quotaExceeded, location: free.storage, message: Quota exceeded: Your project exceeded quota for free storage for projects. For more information, see https://cloud.google.com/bigquery/docs/troubleshoot-quotas

In [19]:
# Date time
#df = lazy_df_list[45].collect()
#df.columns = clean_columns






#pandas_df = df.to_pandas()

for idx, lazy_df in enumerate(lazy_df_list[12:], start=12):
    print(f"Cleaning file number: {idx}\nFile Name: {os.listdir(data_directory)[idx]}")

    df = lazy_df.collect()
    df.columns = clean_columns
    clean_panda_df = df.to_pandas()
    
    clean_panda_df = wedge_cleaner(clean_panda_df)
    
    

    print(f"Finished cleaning file number: {idx}\nFile Name: {os.listdir(data_directory)[idx]}!\n\n------------------------------------")

    print(f"Uploading file number: {idx}\nFile Name: {os.listdir(data_directory)[idx]} to GBQ")

    # Define project_id and destination table
    project_id = 'wedge-project-jbangtson'
    table_name = str(os.listdir(data_directory)[idx]).split(".")[0]
    destination_table = f'the_wedge_dataset.{table_name}'


    to_gbq(clean_panda_df, destination_table, project_id=project_id, if_exists='replace')
   

Cleaning file number: 12
File Name: transArchive_201204_201206.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 12
File Name: transArchive_201204_201206.c

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 13
File Name: transArchive_201204_201206_inactive.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 13
File Name: transArchive_201204

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 14
File Name: transArchive_201207_201209.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 14
File Name: transArchive_201207_201209.c

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 15
File Name: transArchive_201207_201209_inactive.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 15
File Name: transArchive_201207

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 16
File Name: transArchive_201210_201212.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 16
File Name: transArchive_201210_201212.c

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 17
File Name: transArchive_201210_201212_inactive.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 17
File Name: transArchive_201210

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 18
File Name: transArchive_201301_201303.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 18
File Name: transArchive_201301_201303.c

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 19
File Name: transArchive_201301_201303_inactive.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 19
File Name: transArchive_201301

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 20
File Name: transArchive_201304_201306.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 20
File Name: transArchive_201304_201306.c

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 21
File Name: transArchive_201304_201306_inactive.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 21
File Name: transArchive_201304

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 22
File Name: transArchive_201307_201309.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 22
File Name: transArchive_201307_201309.c

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 23
File Name: transArchive_201307_201309_inactive.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 23
File Name: transArchive_201307

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 24
File Name: transArchive_201310_201312.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 24
File Name: transArchive_201310_201312.c

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 25
File Name: transArchive_201310_201312_inactive.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 25
File Name: transArchive_201310

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 26
File Name: transArchive_201401_201403.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 26
File Name: transArchive_201401_201403.c

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 27
File Name: transArchive_201401_201403_inactive.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 27
File Name: transArchive_201401

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 28
File Name: transArchive_201404_201406.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 28
File Name: transArchive_201404_201406.c

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 29
File Name: transArchive_201404_201406_inactive.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 29
File Name: transArchive_201404

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 30
File Name: transArchive_201407_201409.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 30
File Name: transArchive_201407_201409.c

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 31
File Name: transArchive_201407_201409_inactive.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 31
File Name: transArchive_201407

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 32
File Name: transArchive_201410_201412.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 32
File Name: transArchive_201410_201412.c

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 33
File Name: transArchive_201410_201412_inactive.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 33
File Name: transArchive_201410

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 34
File Name: transArchive_201501_201503.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 34
File Name: transArchive_201501_201503.c

100%|██████████| 1/1 [00:00<00:00, 977.92it/s]


Cleaning file number: 35
File Name: transArchive_201504_201506.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 35
File Name: transArchive_201504_201506.c

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 36
File Name: transArchive_201507_201509.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 36
File Name: transArchive_201507_201509.c

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 37
File Name: transArchive_201510.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 37
File Name: transArchive_201510.csv!

---------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 38
File Name: transArchive_201511.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 38
File Name: transArchive_201511.csv!

---------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 39
File Name: transArchive_201512.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 39
File Name: transArchive_201512.csv!

---------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 40
File Name: transArchive_201601.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 40
File Name: transArchive_201601.csv!

---------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 41
File Name: transArchive_201602.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 41
File Name: transArchive_201602.csv!

---------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 42
File Name: transArchive_201603.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 42
File Name: transArchive_201603.csv!

---------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 43
File Name: transArchive_201604.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 43
File Name: transArchive_201604.csv!

---------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 44
File Name: transArchive_201605.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 44
File Name: transArchive_201605.csv!

---------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 45
File Name: transArchive_201606.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 45
File Name: transArchive_201606.csv!

---------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 46
File Name: transArchive_201607.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 46
File Name: transArchive_201607.csv!

---------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 47
File Name: transArchive_201608.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 47
File Name: transArchive_201608.csv!

---------

100%|██████████| 1/1 [00:00<00:00, 1000.55it/s]


Cleaning file number: 48
File Name: transArchive_201609.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 48
File Name: transArchive_201609.csv!

---------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 49
File Name: transArchive_201610.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 49
File Name: transArchive_201610.csv!

---------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 50
File Name: transArchive_201611.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 50
File Name: transArchive_201611.csv!

---------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 51
File Name: transArchive_201612.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 51
File Name: transArchive_201612.csv!

---------

100%|██████████| 1/1 [00:00<?, ?it/s]


Cleaning file number: 52
File Name: transArchive_201701.csv
Column: datetime
Column: register_no
Column: emp_no
Column: trans_no
Column: upc
Column: description
Column: trans_type
Column: trans_subtype
Column: trans_status
Column: department
Column: quantity
Column: Scale
Column: cost
Column: unitPrice
Column: total
Column: regPrice
Column: altPrice
Column: tax
Column: taxexempt
Column: foodstamp
Column: wicable
Column: discount
Column: memDiscount
Column: discountable
Column: discounttype
Column: voided
Column: percentDiscount
Column: ItemQtty
Column: volDiscType
Column: volume
Column: VolSpecial
Column: mixMatch
Column: matched
Column: memType
Column: staff
Column: numflag
Column: itemstatus
Column: tenderstatus
Column: charflag
Column: varflag
Column: batchHeaderID
Column: local
Column: organic
Column: display
Column: receipt
Column: card_no
Column: store
Column: branch
Column: match_id
Column: trans_id
Finished cleaning file number: 52
File Name: transArchive_201701.csv!

---------

100%|██████████| 1/1 [00:00<00:00, 1028.27it/s]


In [16]:
fruit = ["apple", "banana", "cherry"]

for idx, x in enumerate(fruit[1:], start=1):  # Start index from 10
    print(idx, x)

1 banana
2 cherry
