In [None]:
import os
import re
import glob
import zipfile
import fnmatch
import datetime
from zipfile import ZipFile
import pandas as pd
from pandas import DataFrame

In [None]:
def find_sheet(sheet_list, sheet_name):
    """
    If a workbook matches the desired name
    store each worksheet into a list.
    """
    return [x for x in sheet_list if re.search(sheet_name.lower(), x.lower())]

In [None]:
def create_timestamp():
    """
    Creates a timestamp in DB format.
    """
    today = datetime.date.today()
    year = today.year
    month = today.month
    day = today.day
    
    timestamp = f"{str(year)}-{str(month)}-{str(day)}"
    
    return timestamp

In [None]:
# Check to see if package working directories exist.
# If the do not, create them.

data_dir = ".\\excel\\data\\"
archive_dir = ".\\excel\\archive\\"

if not os.path.exists(archive_dir):
    os.makedirs(archive_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    # exit() Use in stand-alone script.

In [None]:
# Check to see if there are existing old Excel files
# in the working directory. If they do, delete them.

if fnmatch.fnmatch(data_file, "*.xlsx") == True:
    os.remove(glob.glob(data_dir + "*.xlsx"))

In [None]:
# Check to see which files are zipped files
# in the working directory, then unzip them
# and then delete them.

for data_file in os.listdir(data_dir):
    if fnmatch.fnmatch(data_file, "*.zip") == True:
        with ZipFile(data_dir + data_file, "r") as zip_obj:
            zip_obj.extractall(data_dir)
        os.remove(data_dir + data_file)

In [None]:
# Version 1

file_list = glob.glob(data_dir + "*.xlsx")

for xl_file in file_list:
    sheets = pd.ExcelFile(xl_file).sheet_names
    workbook = pd.ExcelFile(xl_file)

    if fnmatch.fnmatch(xl_file.lower(), "*grocery*.xlsx") == True:        
        df_grocery = workbook.parse(0)
        df_frozen = workbook.parse(1)
        df_dairy = workbook.parse(2)
#         df_grocery = pd.read_excel(xl_file, find_sheet(sheets, "grocery"))
#         df_frozen = pd.read_excel(xl_file, find_sheet(sheets, "frozen"))
#         df_dairy = pd.read_excel(xl_file, find_sheet(sheets, "dairy"))
    elif fnmatch.fnmatch(xl_file.lower(), "*hbc*.xlsx") == True:
        df_hbc = workbook.parse(0)
    elif fnmatch.fnmatch(xl_file.lower(), "*perish*.xlsx") == True:
        df_perish = workbook.parse(0)
        
cs_list = [df_grocery, df_frozen, df_dairy, df_hbc, df_perish]
df_cs_data = pd.concat(cs_list)
df_cs_data

In [None]:
# Version 2

xl_list = glob.glob(data_dir + "*.xlsx")
cs_list = []

for xl_file in xl_list:
    workbook = pd.ExcelFile(xl_file)
    sheet_list = workbook.sheet_names
    
    for (index, sheet) in enumerate(sheet_list):
        cs_list.append(workbook.parse(index, skiprows=1, header=None))    
#     cs_list = [workbook.parse(x) for x, y in enumerate(workbook.sheet_names)]


In [None]:
txt_list = glob.glob(data_dir + "*.txt")

for txt_file in txt_list:
    if fnmatch.fnmatch(txt_file.lower(), "*mic*.txt") == True:
        df_mic = pd.read_csv(txt_file, sep="|", skiprows=1, header=None)
        df_mic.columns = [3, 24, 1, 25]
        df_mic.drop(columns=[1], inplace=True)

In [None]:
df_mic

In [None]:
df_cs_data = pd.concat(cs_list)
df_cs_data = df_cs_data.merge(df_mic, how="left", on=3)
df_cs_data.drop_duplicates(inplace=True)

df_cs_data[5] = ["Rusty Ames" if x == "Rusty Amees" else x for x in df_cs_data[5]]
df_cs_data[3] = df_cs_data[3].map('{:0>6}'.format)
df_cs_data[6] = df_cs_data[6].map('{:0>6}'.format)

df_cs_data = df_cs_data.reindex(df_cs_data.columns.tolist() + [26, 27, 28, 29, 30], axis=1)
df_cs_data = df_cs_data[[0, 1, 2, 3, 4, 5, 24, 22, 23, 6, 7, 8, 25, 9, 10, 11, 26, 27, 12, 13, 14, 28, 15, 16, 17, 18, 19, 20, 21, 29, 30]]
df_cs_data.columns = ['GL', 
                      'Location Name', 
                      'Customer Code', 
                      'Tops Code', 
                      'Buyer Code', 
                      'Category Business Manager', 
                      'Category', 
                      'Private Label Flag', 
                      'Vendor Name', 
                      'C&S Code', 
                      'Item Description', 
                      'Size', 
                      'Brand', 
                      'UPC - Vendor', 
                      'UPC - Case', 
                      'UPC - Item', 
                      'WTD Category Unit Lift %', 
                      'WTD Item Unit Lift %', 
                      'Weekly Turn (Forecast)', 
                      'BOH','Total On Order', 
                      'OOS Yesterday', 
                      'Next PO Due Date', 
                      'Next PO Appt Date', 
                      'Next PO Qty', 
                      'Next Biceps PO#', 
                      'Lead Time', 
                      'Current Week Bookings', 
                      'Future Bookings', 
                      'Item Key', 
                      'Manufacturer Status']


In [None]:
df_cs_data

In [None]:
# Use code later to clean file

# val = df_grocery.iloc[:, [0, 1, 2, 3, 4, 5, 22, 23, 6, 7, 8, 9, 10, 11, 12]]
# val

In [None]:
df_cs_data.to_excel(f"{archive_dir}CS-Sales-Change-{create_timestamp()}.xlsx", index=False)

In [None]:
txt_list = glob.glob(data_dir + "*.txt")

for txt_file in txt_list:
    df_wip_sales = pd.read_csv(txt_file, sep="~", header=None)

df_wip_sales