In [None]:
import pandas as pd
from datetime import datetime

dtype = {
    'BUSINESS_GROUP':'str',
    'SOURCE_DATA':'str',
    'INVOICE_DATE':'str',
    'SUPPLIER_NBR':'str',
    'SUPPLIER_NORMALIZED':'str',
    'SUPPLIER_PARENT':'str',
    'HOLDING_COMPANY':'str',
}
# Read the existing master file
master_file = pd.read_csv('prepared-holding_company_master 6.14.24.csv', encoding='UTF-8-SIG',dtype=dtype)

print(f"Holding master shape as for {datetime.now().strftime('%B %d, %Y')} {master_file.shape}")
print(f"Supplier number distinct: {master_file['SUPPLIER_NBR'].nunique()}")
print(f"Supplier Normalized Distinct: {master_file['SUPPLIER_NORMALIZED'].nunique()}")

Holding master shape as for July 01, 2024 (1040, 7)
Supplier number distinct: 1025
Supplier Normalized Distinct: 989


In [None]:
# Read the most recent per-wave file
per_wave = pd.read_csv('per-wave-holding_company_7.1.24.csv', encoding='UTF-8-SIG', dtype=dtype) 

print(f"Holding perwavr shape {per_wave.shape}")
print(f"Supplier number distinct: {per_wave['SUPPLIER_NBR'].nunique()}")
print(f"Supplier Normalized Distinct: {per_wave['SUPPLIER_NORMALIZED'].nunique()}")

Holding perwavr shape (3, 7)
Supplier number distinct: 3
Supplier Normalized Distinct: 3


In [None]:
# Perform a standard union to combine the two tables leaving per-wave at the bottom
combined = pd.concat([master_file,per_wave], ignore_index=True)

# Ensure the month abbreviation is upper case
combined['INVOICE_DATE'] = combined['INVOICE_DATE'].str.upper()

# Display the result
print(f"Holding final shape {combined.shape}")
print(f"Supplier number distinct: {combined['SUPPLIER_NBR'].nunique()}")
print(f"Supplier Normalized Distinct: {combined['SUPPLIER_NORMALIZED'].nunique()}")

Holding final shape (1043, 7)
Supplier number distinct: 1028
Supplier Normalized Distinct: 992


In [None]:
import zipfile

# Export to csv
csv_filename = 'prepared-holding_company_master_7.1.24.csv'
combined.to_csv(csv_filename,encoding='UTF-8-SIG', index=False)

# Create a Zip file
zip_filename = csv_filename.replace('csv', 'zip')
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    # Add the CSV file to the zip archive
    zipf.write(csv_filename)