In [1]:
#cleaning data
import pandas as pd
import numpy as np
from zipfile import ZipFile
import os
import io
import csv
import datetime

import data_functions as datafun

In [2]:
outer = "Data/WedgeZipOfZips.zip"

delimiters = dict() 


In [3]:
# Define your schema column names for files without headers
schema_columns = ['datetime', 'register_no', 'emp_no', 'trans_no', 'upc', 'description', 
                  'trans_type', 'trans_subtype', 'trans_status', 'department', 'quantity', 
                  'Scale', 'cost', 'unitPrice', 'total', 'regPrice', 'altPrice', 'tax', 
                  'taxexempt', 'foodstamp', 'wicable', 'discount', 'memDiscount', 
                  'discountable', 'discounttype', 'voided', 'percentDiscount', 'ItemQtty', 
                  'volDiscType', 'volume', 'VolSpecial', 'mixMatch', 'matched', 'memType', 
                  'staff', 'numflag', 'itemstatus', 'tenderstatus', 'charflag', 'varflag', 
                  'batchHeaderID', 'local', 'organic', 'display', 'receipt', 'card_no', 
                  'store', 'branch', 'match_id', 'trans_id']



In [4]:
# Step 1: Open the ZIP file
with ZipFile(outer, 'r') as outer_zip:
    outer_files = outer_zip.namelist()

    processed_files = 0

    count_of_nulls = 0

    # Step 2: Loop through the files in the outer ZIP
    for outer_file in outer_files:
        if outer_file.endswith('.zip'):  # Check if it's a ZIP file within the outer ZIP
            print(f"Found inner ZIP file: {outer_file}")

            # Step 3: Open the inner ZIP file
            with outer_zip.open(outer_file) as inner_zip_file:
                # Step 4: Read the inner ZIP file as a ZipFile object
                with ZipFile(inner_zip_file) as inner_zip:
                    inner_files = inner_zip.namelist()

                    for file in inner_files:
                        if file.endswith('.csv'):
                            print(f"Processing CSV file: {file}")
                            
                            # Step 3: Read the CSV file
                            with inner_zip.open(file, 'r') as csv_file:
                                csv_file = io.TextIOWrapper(csv_file, encoding="utf-8")
                                

                                 # Read a sample of the file to detect delimiter
                                sample = csv_file.read(3030)
                                csv_file.seek(0)  # Reset the file pointer

                                # Use csv.Sniffer to detect the delimiter
                                try:
                                    sniffer = csv.Sniffer()
                                    dialect = sniffer.sniff(sample, delimiters=[',', ';', '\t', ':'])
                                    delimiter = dialect.delimiter
                                    print(f"Detected delimiter: {delimiter}")
                                except csv.Error:
                                    delimiter = ','  # Default to comma if delimiter detection fails
                                    print("Could not detect delimiter, using default ','")

                                #check for headers
                                has_header = sniffer.has_header(sample)
                                print(f"Has header: {has_header}")

                                #remove the headers if they exist and add the schema columns
                                if not has_header:
                                    df = pd.read_csv(csv_file, delimiter=delimiter, header=None, names=schema_columns)
                                else:
                                    df = pd.read_csv(csv_file, delimiter=delimiter)
                                    df.columns = schema_columns

                                # Step 5: Perform data cleaning view data types
                                print("Cleaning data...")
                                df = datafun.clean_data(df)
                                print("Data cleaned.")

                                #count null values and add to count_of_nulls
                                count_of_nulls += df["trans_subtype"].isnull().sum()

                                
                                # open connection to bigquery
                                table_name = "wedgeproject-438019.wedgeproject." + file.split(".")[0].lower()
                                datafun.upload_to_bigquery(df,table_name)
                                
                                processed_files += 1
                                progress = processed_files / len(outer_files) * 100
                                print(f"Progress: {progress:.2f}%")

                            
print(count_of_nulls)

print("All files processed.")

#count null values in trans_subtype

                               
                                



Found inner ZIP file: transArchive_201001_201003.zip
Processing CSV file: transArchive_201001_201003.csv
Detected delimiter: ,
Has header: True
Cleaning data...


  df['memType'] = df['memType'].fillna(False)  # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False
  df['display'] = df['display'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201001_201003...
Successfully loaded 2998330 rows into wedgeproject-438019.wedgeproject.transarchive_201001_201003
Progress: 1.89%
Found inner ZIP file: transArchive_201004_201006.zip
Processing CSV file: transArchive_201004_201006.csv
Detected delimiter: ,
Has header: True
Cleaning data...


  df['memType'] = df['memType'].fillna(False)  # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False
  df['display'] = df['display'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201004_201006...
Successfully loaded 3185807 rows into wedgeproject-438019.wedgeproject.transarchive_201004_201006
Progress: 3.77%
Found inner ZIP file: transArchive_201007_201009.zip
Processing CSV file: transArchive_201007_201009.csv
Detected delimiter: ,
Has header: True
Cleaning data...


  df['memType'] = df['memType'].fillna(False)  # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False
  df['display'] = df['display'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201007_201009...
Successfully loaded 2992585 rows into wedgeproject-438019.wedgeproject.transarchive_201007_201009
Progress: 5.66%
Found inner ZIP file: transArchive_201010_201012.zip
Processing CSV file: transArchive_201010_201012.csv
Detected delimiter: ,
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)


Cleaning data...


  df['memType'] = df['memType'].fillna(False)  # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201010_201012...
Successfully loaded 2957586 rows into wedgeproject-438019.wedgeproject.transarchive_201010_201012
Progress: 7.55%
Found inner ZIP file: transArchive_201101_201103.zip
Processing CSV file: transArchive_201101_201103.csv
Detected delimiter: ,
Has header: True
Cleaning data...


  df['memType'] = df['memType'].fillna(False)  # fill with False
  df['staff'] = df['staff'].fillna(False)      # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201101_201103...
Successfully loaded 2920826 rows into wedgeproject-438019.wedgeproject.transarchive_201101_201103
Progress: 9.43%
Found inner ZIP file: transArchive_201104.zip
Processing CSV file: transArchive_201104.csv
Detected delimiter: ,
Has header: True
Cleaning data...


  df['memType'] = df['memType'].fillna(False)  # fill with False
  df['staff'] = df['staff'].fillna(False)      # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201104...
Successfully loaded 1066334 rows into wedgeproject-438019.wedgeproject.transarchive_201104
Progress: 11.32%
Found inner ZIP file: transArchive_201105.zip
Processing CSV file: transArchive_201105.csv
Detected delimiter: ,
Has header: True
Cleaning data...


  df['memType'] = df['memType'].fillna(False)  # fill with False
  df['staff'] = df['staff'].fillna(False)      # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201105...
Successfully loaded 1068515 rows into wedgeproject-438019.wedgeproject.transarchive_201105
Progress: 13.21%
Found inner ZIP file: transArchive_201106.zip
Processing CSV file: transArchive_201106.csv
Detected delimiter: ,
Has header: True
Cleaning data...


  df['staff'] = df['staff'].fillna(False)      # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201106...
Successfully loaded 992906 rows into wedgeproject-438019.wedgeproject.transarchive_201106
Progress: 15.09%
Found inner ZIP file: transArchive_201107_201109.zip
Processing CSV file: transArchive_201107_201109.csv
Detected delimiter: ,
Has header: True
Cleaning data...


  df['memType'] = df['memType'].fillna(False)  # fill with False
  df['staff'] = df['staff'].fillna(False)      # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201107_201109...
Successfully loaded 3011935 rows into wedgeproject-438019.wedgeproject.transarchive_201107_201109
Progress: 16.98%
Found inner ZIP file: transArchive_201110_201112.zip
Processing CSV file: transArchive_201110_201112.csv
Detected delimiter: ,
Has header: True
Cleaning data...


  df['memType'] = df['memType'].fillna(False)  # fill with False
  df['staff'] = df['staff'].fillna(False)      # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201110_201112...
Successfully loaded 3121117 rows into wedgeproject-438019.wedgeproject.transarchive_201110_201112
Progress: 18.87%
Found inner ZIP file: transArchive_201201_201203.zip
Processing CSV file: transArchive_201201_201203.csv
Detected delimiter: ,
Has header: True
Cleaning data...


  df['memType'] = df['memType'].fillna(False)  # fill with False
  df['staff'] = df['staff'].fillna(False)      # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201201_201203...
Successfully loaded 2989644 rows into wedgeproject-438019.wedgeproject.transarchive_201201_201203
Progress: 20.75%
Found inner ZIP file: transArchive_201201_201203_inactive.zip
Processing CSV file: transArchive_201201_201203_inactive.csv
Detected delimiter: ;
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)
  df['memType'] = df['memType'].fillna(False)  # fill with False


Cleaning data...
Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201201_201203_inactive...
Successfully loaded 245772 rows into wedgeproject-438019.wedgeproject.transarchive_201201_201203_inactive
Progress: 22.64%
Found inner ZIP file: transArchive_201204_201206.zip
Processing CSV file: transArchive_201204_201206.csv
Detected delimiter: ,
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)


Cleaning data...


  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201204_201206...
Successfully loaded 3083546 rows into wedgeproject-438019.wedgeproject.transarchive_201204_201206
Progress: 24.53%
Found inner ZIP file: transArchive_201204_201206_inactive.zip
Processing CSV file: transArchive_201204_201206_inactive.csv
Detected delimiter: ;
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)
  df['memType'] = df['memType'].fillna(False)  # fill with False


Cleaning data...
Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201204_201206_inactive...
Successfully loaded 237990 rows into wedgeproject-438019.wedgeproject.transarchive_201204_201206_inactive
Progress: 26.42%
Found inner ZIP file: transArchive_201207_201209.zip
Processing CSV file: transArchive_201207_201209.csv
Detected delimiter: ,
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)


Cleaning data...


  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201207_201209...
Successfully loaded 2925608 rows into wedgeproject-438019.wedgeproject.transarchive_201207_201209
Progress: 28.30%
Found inner ZIP file: transArchive_201207_201209_inactive.zip
Processing CSV file: transArchive_201207_201209_inactive.csv
Detected delimiter: ;
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)


Cleaning data...
Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201207_201209_inactive...
Successfully loaded 190877 rows into wedgeproject-438019.wedgeproject.transarchive_201207_201209_inactive
Progress: 30.19%
Found inner ZIP file: transArchive_201210_201212.zip
Processing CSV file: transArchive_201210_201212.csv
Detected delimiter: ,
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)


Cleaning data...


  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201210_201212...
Successfully loaded 2893637 rows into wedgeproject-438019.wedgeproject.transarchive_201210_201212
Progress: 32.08%
Found inner ZIP file: transArchive_201210_201212_inactive.zip
Processing CSV file: transArchive_201210_201212_inactive.csv
Detected delimiter: ;
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)


Cleaning data...
Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201210_201212_inactive...
Successfully loaded 162988 rows into wedgeproject-438019.wedgeproject.transarchive_201210_201212_inactive
Progress: 33.96%
Found inner ZIP file: transArchive_201301_201303.zip
Processing CSV file: transArchive_201301_201303.csv
Detected delimiter: ,
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)


Cleaning data...


  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201301_201303...
Successfully loaded 2903987 rows into wedgeproject-438019.wedgeproject.transarchive_201301_201303
Progress: 35.85%
Found inner ZIP file: transArchive_201301_201303_inactive.zip
Processing CSV file: transArchive_201301_201303_inactive.csv
Detected delimiter: ;
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)


Cleaning data...
Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201301_201303_inactive...
Successfully loaded 148623 rows into wedgeproject-438019.wedgeproject.transarchive_201301_201303_inactive
Progress: 37.74%
Found inner ZIP file: transArchive_201304_201306.zip
Processing CSV file: transArchive_201304_201306.csv
Detected delimiter: ,
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)


Cleaning data...


  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201304_201306...
Successfully loaded 3025434 rows into wedgeproject-438019.wedgeproject.transarchive_201304_201306
Progress: 39.62%
Found inner ZIP file: transArchive_201304_201306_inactive.zip
Processing CSV file: transArchive_201304_201306_inactive.csv
Detected delimiter: ;
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)


Cleaning data...
Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201304_201306_inactive...
Successfully loaded 137628 rows into wedgeproject-438019.wedgeproject.transarchive_201304_201306_inactive
Progress: 41.51%
Found inner ZIP file: transArchive_201307_201309.zip
Processing CSV file: transArchive_201307_201309.csv
Detected delimiter: ,
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)


Cleaning data...


  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201307_201309...
Successfully loaded 2997135 rows into wedgeproject-438019.wedgeproject.transarchive_201307_201309
Progress: 43.40%
Found inner ZIP file: transArchive_201307_201309_inactive.zip
Processing CSV file: transArchive_201307_201309_inactive.csv
Detected delimiter: ;
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)
  df['memType'] = df['memType'].fillna(False)  # fill with False


Cleaning data...
Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201307_201309_inactive...
Successfully loaded 104468 rows into wedgeproject-438019.wedgeproject.transarchive_201307_201309_inactive
Progress: 45.28%
Found inner ZIP file: transArchive_201310_201312.zip
Processing CSV file: transArchive_201310_201312.csv
Detected delimiter: ,
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)


Cleaning data...


  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201310_201312...
Successfully loaded 2922057 rows into wedgeproject-438019.wedgeproject.transarchive_201310_201312
Progress: 47.17%
Found inner ZIP file: transArchive_201310_201312_inactive.zip
Processing CSV file: transArchive_201310_201312_inactive.csv
Detected delimiter: ;
Has header: True
Cleaning data...


  df['memType'] = df['memType'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201310_201312_inactive...
Successfully loaded 79156 rows into wedgeproject-438019.wedgeproject.transarchive_201310_201312_inactive
Progress: 49.06%
Found inner ZIP file: transArchive_201401_201403.zip
Processing CSV file: transArchive_201401_201403.csv
Detected delimiter: ,
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)


Cleaning data...


  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201401_201403...
Successfully loaded 2916194 rows into wedgeproject-438019.wedgeproject.transarchive_201401_201403
Progress: 50.94%
Found inner ZIP file: transArchive_201401_201403_inactive.zip
Processing CSV file: transArchive_201401_201403_inactive.csv
Detected delimiter: ;
Has header: True
Cleaning data...


  df = pd.read_csv(csv_file, delimiter=delimiter)
  df['memType'] = df['memType'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201401_201403_inactive...
Successfully loaded 52614 rows into wedgeproject-438019.wedgeproject.transarchive_201401_201403_inactive
Progress: 52.83%
Found inner ZIP file: transArchive_201404_201406.zip
Processing CSV file: transArchive_201404_201406.csv
Detected delimiter: ,
Has header: True


  df = pd.read_csv(csv_file, delimiter=delimiter)


Cleaning data...


  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201404_201406...
Successfully loaded 3154267 rows into wedgeproject-438019.wedgeproject.transarchive_201404_201406
Progress: 54.72%
Found inner ZIP file: transArchive_201404_201406_inactive.zip
Processing CSV file: transArchive_201404_201406_inactive.csv
Detected delimiter: ;
Has header: True
Cleaning data...


  df['memType'] = df['memType'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201404_201406_inactive...
Successfully loaded 49069 rows into wedgeproject-438019.wedgeproject.transarchive_201404_201406_inactive
Progress: 56.60%
Found inner ZIP file: transArchive_201407_201409.zip
Processing CSV file: transArchive_201407_201409.csv
Detected delimiter: ,
Has header: True
Cleaning data...


  df['memType'] = df['memType'].fillna(False)  # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False
  df['display'] = df['display'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201407_201409...
Successfully loaded 3030409 rows into wedgeproject-438019.wedgeproject.transarchive_201407_201409
Progress: 58.49%
Found inner ZIP file: transArchive_201407_201409_inactive.zip
Processing CSV file: transArchive_201407_201409_inactive.csv
Detected delimiter: ;
Has header: True
Cleaning data...


  df['memType'] = df['memType'].fillna(False)  # fill with False
  df['staff'] = df['staff'].fillna(False)      # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False
  df['display'] = df['display'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201407_201409_inactive...
Successfully loaded 28323 rows into wedgeproject-438019.wedgeproject.transarchive_201407_201409_inactive
Progress: 60.38%
Found inner ZIP file: transArchive_201410_201412.zip
Processing CSV file: transArchive_201410_201412.csv
Detected delimiter: ,
Has header: True
Cleaning data...


  df['staff'] = df['staff'].fillna(False)      # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False
  df['display'] = df['display'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201410_201412...
Successfully loaded 2931416 rows into wedgeproject-438019.wedgeproject.transarchive_201410_201412
Progress: 62.26%
Found inner ZIP file: transArchive_201410_201412_inactive.zip
Processing CSV file: transArchive_201410_201412_inactive.csv
Detected delimiter: ;
Has header: True
Cleaning data...
Data cleaned.
Uploading data to the cloud...


  df['staff'] = df['staff'].fillna(False)      # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False
  df['display'] = df['display'].fillna(False)  # fill with False


Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201410_201412_inactive...
Successfully loaded 7964 rows into wedgeproject-438019.wedgeproject.transarchive_201410_201412_inactive
Progress: 64.15%
Found inner ZIP file: transArchive_201501_201503.zip
Processing CSV file: transArchive_201501_201503.csv
Detected delimiter: ,
Has header: True
Cleaning data...
Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201501_201503...
Successfully loaded 3041129 rows into wedgeproject-438019.wedgeproject.transarchive_201501_201503
Progress: 66.04%
Found inner ZIP file: transArchive_201504_201506.zip
Processing CSV file: transArchive_201504_201506.csv
Detected delimiter: ,
Has header: True
Cleaning data...


  df['staff'] = df['staff'].fillna(False)      # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201504_201506...
Successfully loaded 3274964 rows into wedgeproject-438019.wedgeproject.transarchive_201504_201506
Progress: 67.92%
Found inner ZIP file: transArchive_201507_201509.zip
Processing CSV file: transArchive_201507_201509.csv
Detected delimiter: ,
Has header: True
Cleaning data...


  df['staff'] = df['staff'].fillna(False)      # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False
  df['display'] = df['display'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201507_201509...
Successfully loaded 3124699 rows into wedgeproject-438019.wedgeproject.transarchive_201507_201509
Progress: 69.81%
Found inner ZIP file: transArchive_201510.zip
Processing CSV file: transArchive_201510.csv
Detected delimiter: ,
Has header: True
Cleaning data...
Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201510...
Successfully loaded 1006055 rows into wedgeproject-438019.wedgeproject.transarchive_201510
Progress: 71.70%
Found inner ZIP file: transArchive_201511.zip
Processing CSV file: transArchive_201511.csv
Detected delimiter: ,
Has header: False


  df = pd.read_csv(csv_file, delimiter=delimiter, header=None, names=schema_columns)


Cleaning data...


  df = df.replace(r'\\N', np.nan, regex=True)


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201511...
Successfully loaded 993744 rows into wedgeproject-438019.wedgeproject.transarchive_201511
Progress: 73.58%
Found inner ZIP file: transArchive_201512.zip
Processing CSV file: transArchive_201512.csv
Detected delimiter: ,
Has header: False
Cleaning data...


  df['staff'] = df['staff'].fillna(False)      # fill with False
  df['batchHeaderID'] = df['batchHeaderID'].fillna(False)  # fill with False
  df['display'] = df['display'].fillna(False)  # fill with False


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201512...
Successfully loaded 960017 rows into wedgeproject-438019.wedgeproject.transarchive_201512
Progress: 75.47%
Found inner ZIP file: transArchive_201601.zip
Processing CSV file: transArchive_201601.csv
Detected delimiter: ,
Has header: False
Cleaning data...


  df = df.replace(r'\\N', np.nan, regex=True)


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201601...
Successfully loaded 979408 rows into wedgeproject-438019.wedgeproject.transarchive_201601
Progress: 77.36%
Found inner ZIP file: transArchive_201602.zip
Processing CSV file: transArchive_201602.csv
Detected delimiter: ,
Has header: False
Cleaning data...


  df = df.replace(r'\\N', np.nan, regex=True)


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201602...
Successfully loaded 874853 rows into wedgeproject-438019.wedgeproject.transarchive_201602
Progress: 79.25%
Found inner ZIP file: transArchive_201603.zip
Processing CSV file: transArchive_201603.csv
Detected delimiter: ,
Has header: False
Cleaning data...


  df = df.replace(r'\\N', np.nan, regex=True)


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201603...
Successfully loaded 964635 rows into wedgeproject-438019.wedgeproject.transarchive_201603
Progress: 81.13%
Found inner ZIP file: transArchive_201604.zip
Processing CSV file: transArchive_201604.csv
Detected delimiter: ,
Has header: False
Cleaning data...


  df = df.replace(r'\\N', np.nan, regex=True)


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201604...
Successfully loaded 930359 rows into wedgeproject-438019.wedgeproject.transarchive_201604
Progress: 83.02%
Found inner ZIP file: transArchive_201605.zip
Processing CSV file: transArchive_201605.csv
Detected delimiter: ,
Has header: False


  df = pd.read_csv(csv_file, delimiter=delimiter, header=None, names=schema_columns)


Cleaning data...


  df = df.replace(r'\\N', np.nan, regex=True)


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201605...
Successfully loaded 938769 rows into wedgeproject-438019.wedgeproject.transarchive_201605
Progress: 84.91%
Found inner ZIP file: transArchive_201606.zip
Processing CSV file: transArchive_201606.csv
Detected delimiter: ,
Has header: False
Cleaning data...


  df = df.replace(r'\\N', np.nan, regex=True)


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201606...
Successfully loaded 862329 rows into wedgeproject-438019.wedgeproject.transarchive_201606
Progress: 86.79%
Found inner ZIP file: transArchive_201607.zip
Processing CSV file: transArchive_201607.csv
Detected delimiter: ,
Has header: False


  df = pd.read_csv(csv_file, delimiter=delimiter, header=None, names=schema_columns)


Cleaning data...


  df = df.replace(r'\\N', np.nan, regex=True)


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201607...
Successfully loaded 872161 rows into wedgeproject-438019.wedgeproject.transarchive_201607
Progress: 88.68%
Found inner ZIP file: transArchive_201608.zip
Processing CSV file: transArchive_201608.csv
Detected delimiter: ,
Has header: False


  df = pd.read_csv(csv_file, delimiter=delimiter, header=None, names=schema_columns)


Cleaning data...


  df = df.replace(r'\\N', np.nan, regex=True)


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201608...
Successfully loaded 858168 rows into wedgeproject-438019.wedgeproject.transarchive_201608
Progress: 90.57%
Found inner ZIP file: transArchive_201609.zip
Processing CSV file: transArchive_201609.csv
Detected delimiter: ,
Has header: False


  df = pd.read_csv(csv_file, delimiter=delimiter, header=None, names=schema_columns)


Cleaning data...


  df = df.replace(r'\\N', np.nan, regex=True)


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201609...
Successfully loaded 861248 rows into wedgeproject-438019.wedgeproject.transarchive_201609
Progress: 92.45%
Found inner ZIP file: transArchive_201610.zip
Processing CSV file: transArchive_201610.csv
Detected delimiter: ,
Has header: False


  df = pd.read_csv(csv_file, delimiter=delimiter, header=None, names=schema_columns)


Cleaning data...


  df = df.replace(r'\\N', np.nan, regex=True)


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201610...
Successfully loaded 905092 rows into wedgeproject-438019.wedgeproject.transarchive_201610
Progress: 94.34%
Found inner ZIP file: transArchive_201611.zip
Processing CSV file: transArchive_201611.csv
Detected delimiter: ,
Has header: False
Cleaning data...


  df = df.replace(r'\\N', np.nan, regex=True)


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201611...
Successfully loaded 925314 rows into wedgeproject-438019.wedgeproject.transarchive_201611
Progress: 96.23%
Found inner ZIP file: transArchive_201612.zip
Processing CSV file: transArchive_201612.csv
Detected delimiter: ,
Has header: False


  df = pd.read_csv(csv_file, delimiter=delimiter, header=None, names=schema_columns)


Cleaning data...


  df = df.replace(r'\\N', np.nan, regex=True)


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201612...
Successfully loaded 915707 rows into wedgeproject-438019.wedgeproject.transarchive_201612
Progress: 98.11%
Found inner ZIP file: transArchive_201701.zip
Processing CSV file: transArchive_201701.csv
Detected delimiter: ,
Has header: False


  df = pd.read_csv(csv_file, delimiter=delimiter, header=None, names=schema_columns)


Cleaning data...


  df = df.replace(r'\\N', np.nan, regex=True)


Data cleaned.
Uploading data to the cloud...
Checking if table exists...
Deleting table wedgeproject-438019.wedgeproject.transarchive_201701...
Successfully loaded 936741 rows into wedgeproject-438019.wedgeproject.transarchive_201701
Progress: 100.00%
72189676
All files processed.
