In [None]:
import os
import re
import glob
import zipfile
import fnmatch
import datetime
from zipfile import ZipFile
import pandas as pd
import numpy as np
from pandas import DataFrame
import openpyxl
from openpyxl import load_workbook

In [None]:
def create_timestamp():
    """
    Creates a timestamp in DB format.
    """
    today = datetime.date.today()
    year = today.year
    month = today.month
    day = today.day
    
    timestamp = f"{str(year)}-{str(month)}-{str(day)}"
    
    return timestamp

In [None]:
data_dir = ".\\excel\\data\\"
archive_dir = ".\\excel\\archive\\"

if not os.path.exists(archive_dir):
    os.makedirs(archive_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

In [None]:
xl_list = glob.glob(data_dir + "*.xlsx")

for xl_file in xl_list:
    sheets = pd.ExcelFile(xl_file).sheet_names
    workbook = pd.ExcelFile(xl_file)

    if fnmatch.fnmatch(xl_file.lower(), "*reason*.xlsx") == True:
        df_reason = workbook.parse(0, skiprows=2, header=None)
        df_reason.columns = ["dept", 
                             "category", 
                             "itemDesc", 
                             "itemCode", 
                             "outOfStock", 
                             "manufacIssue",
                             "disc",
                             "other",
                             "newItemIssue"]
        df_reason["itemCode"] = df_reason["itemCode"].map('{:0>6}'.format)
        df_reason["max"] = df_reason[[df_reason.columns[4], 
                                      df_reason.columns[5], 
                                      df_reason.columns[6], 
                                      df_reason.columns[7], 
                                      df_reason.columns[8]]].max(axis=1)
        df_reason.loc[df_reason["max"] == df_reason["outOfStock"], "primaryReason"] = "Out Of Stock"
        df_reason.loc[df_reason["max"] == df_reason["manufacIssue"], "primaryReason"] = "Manufacturer Issue"
        df_reason.loc[df_reason["max"] == df_reason["disc"], "primaryReason"] = "Discontinued"
        df_reason.loc[df_reason["max"] == df_reason["other"], "primaryReason"] = "Other"
        df_reason.loc[df_reason["max"] == df_reason["newItemIssue"], "primaryReason"] = "New Item Issue"
        df_reason.sort_values(by=["max"], ascending=False, inplace=True)
        df_reason.drop(columns=["dept", 
                                "category", 
                                "itemDesc", 
                                "outOfStock", 
                                "manufacIssue", 
                                "disc", 
                                "other", 
                                "newItemIssue", 
                                "max"], inplace=True)
        
    elif fnmatch.fnmatch(xl_file.lower(), "*short*.xlsx") == True:
        df_shorts = workbook.parse(0, skiprows=1, header=None)
        df_shorts.columns = ["itemDesc", 
                             "itemCode", 
                             "yesterdayOOS"]
        df_shorts["itemCode"] = df_shorts["itemCode"].map('{:0>6}'.format)
        df_shorts.drop(columns=["itemDesc"], inplace=True)
        
    elif fnmatch.fnmatch(xl_file.lower(), "*base*.xlsx") == True:
        df_base = workbook.parse(0, skiprows=1, header=None)
        df_base.columns = ["dept", 
                           "category", 
                           "itemDesc", 
                           "itemCode", 
                           "itemSize", 
                           "pvtLblFlag", 
                           "buyerCode", 
                           "invUnitShipped", 
                           "invCaseShipped", 
                           "storeOrdProdQty", 
                           "shortedQty", 
                           "grossSvcLvl", 
                           "netSvcLvl"]
        df_base["itemCode"] = df_base["itemCode"].map('{:0>6}'.format)
        df_base["buyerCode"] = df_base["buyerCode"] * 10
        df_base["itemDesc"] = df_base["itemDesc"] + "   " + df_base["itemSize"]
        
    elif fnmatch.fnmatch(xl_file.lower(), "*export*.xlsx") == True:
        df_cs = workbook.parse(0, skiprows=3, skipfooter=20, header=None)
        df_cs = df_cs.filter([0, 14, 15, 17, 34])
        df_cs.columns = ["custCode", 
                         "poDueDate", 
                         "poApptDate", 
                         "inStock", 
                         "daysOOS"]
        df_cs["itemCode"] = df_cs["custCode"].astype(str).str[9:15]
        df_cs.drop(columns=["custCode"], inplace=True)

In [None]:
df_join_1 = df_base.merge(df_reason, how="left", on="itemCode")
df_join_2 = df_join_1.merge(df_shorts, how="left", on="itemCode")
df_join_3 = df_join_2.merge(df_cs, how="left", on="itemCode")

# df_join_2["poDueDate"] = "NO CS DATA"
# df_join_2["poApptDate"] = "NO CS DATA"
# df_join_2["inStock"] = "NO CS DATA"
# df_join_2["daysOOS"] = "NO CS DATA"

df_join_3

In [None]:
df_join_3.to_excel(f".\\excel\\archive\\oos-data-{create_timestamp()}.xlsx")

In [None]:
# template = r".\\excel\\template.xlsx"

# writer = pd.ExcelWriter(template, engine='openpyxl')
# df_join_2.to_excel(writer, "Data", index=False)
# writer.save()
# writer.close()

# book = load_workbook(template)
# writer = pd.ExcelWriter(template, engine='openpyxl')
# writer.book = book
# df_join_2.to_excel(writer, sheet_name = "Data", index=False)
# writer.save()
# writer.close()

workbook1 = openpyxl.load_workbook(".\\excel\\template.xlsx")

writer = pd.ExcelWriter('file.xlsx', engine='openpyxl') 
writer.book = workbook1
df_join_2.to_excel(writer, sheet_name='Data', index=False, startrow=2, startcol=1)
 
writer.save()
writer.close()

with pd.ExcelWriter(".\\excel\\template.xlsx", engine='openpyxl', mode='a') as writer:  
    df_join_2.to_excel(writer, sheet_name='Data')
    writer.close()