In [145]:
import pandas as pd
from pathlib import Path

In [4]:
FILE_PATH = Path('data').joinpath('STO Report 01.09.21-19.12.21.XLSX')

In [5]:
FILE_PATH

WindowsPath('data/STO Report 01.09.21-19.12.21.XLSX')

In [148]:
REQUIRED_COLUMNS = [
    'STO No.', 'STO Status', 'Receiving Plant', 
    'Item Description', 'Item SKU Qty', 'SKU UoM', 
    'Delivery No', 'PGI Quantity', 'PGI UoM','PGI Date'
]

COLUMN_CLEANED = [item.lower().replace(" ", "_").replace(".", "").replace("/", "") for item in REQUIRED_COLUMNS]

COLUMN_SCHEMA = dict(zip(REQUIRED_COLUMNS, COLUMN_CLEANED))

WEEKLY_RPD_REQUIRED_COLUMNS = ['STO No.', 'Item', 'Quantity (kg)', 'Production', 'FG']

WEEKLY_RPD_REQUIRED_COLUMNS_CLEANED = [
    item.lower()
    .replace(" ", "_")
    .replace(".", "")
    .replace("/", "")
    .replace("(", "")
    .replace(")", "")
    
    for item in REQUIRED_COLUMNS
]

WEEKLY_PRD_SCHEMA = dict(zip(WEEKLY_RPD_REQUIRED_COLUMNS, WEEKLY_RPD_REQUIRED_COLUMNS_CLEANED))

In [131]:
temp_df = pd.read_excel(FILE_PATH, sheet_name="Weekly Prod. Sche. 12.12-18.12", skiprows=2)

In [134]:
temp_df.head()

Unnamed: 0.1,Unnamed: 0,STO No.,Item,Quantity (kg),Production,Deviation,FG,Remarks,Estimated Delivery Date
0,"4500025193 Wrap-Fresh Single P.Film T.Tissue,Gold",4500025193,"Wrap-Fresh Single P.Film T.Tissue,Gold",2000.0,,,,,12/12/21-18/12/21
1,"4500025944 Wrap-Fresh Rstrt P.Npk P.Film-100S,Prf",4500025944,"Wrap-Fresh Rstrt P.Npk P.Film-100S,Prf",2000.0,2115.0,,,,
2,"4500025944 Wrap-Napkin Poly Film-100P,Prf",4500025944,"Wrap-Napkin Poly Film-100P,Prf",2000.0,1084.0,,,,
3,4500025944 Wrap-Hand Towel Poly Film 150Sheet ...,4500025944,Wrap-Hand Towel Poly Film 150Sheet Tisue,2000.0,907.0,,,,
4,"4500026199 Wrap-Fresh Wallet Tissue,Prf",4500026199,"Wrap-Fresh Wallet Tissue,Prf",3000.0,589.0,,,,


In [127]:
temp_df.columns

Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4',
       'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8'],
      dtype='object')

In [22]:
WEEKLY_COLUMN_SCHEMA = [""]

{'STO No.': 'sto_no',
 'STO Status': 'sto_status',
 'Receiving Plant': 'receiving_plant',
 'Item Description': 'item_description',
 'Item SKU Qty': 'item_sku_qty',
 'SKU UoM': 'sku_uom',
 'Delivery No': 'delivery_no',
 'PGI Quantity': 'pgi_quantity',
 'PGI UoM': 'pgi_uom',
 'PGI Date': 'pgi_date'}

In [123]:
def read_to_dataframe(
    path: str, 
    required_column: list, 
    column_schema: dict, 
    sheet_name: str
) -> pd.DataFrame:
    df = pd.read_excel(path, sheet_name=sheet_name)
    df = df[required_column]
    df = df.rename(columns=column_schema)
    df = df[(~df["sto_no"].isnull()) & (~df.item_description.isnull()) & (~df.receiving_plant.isnull())]
    return df

In [124]:
RAW_DF = read_to_dataframe(
    path=FILE_PATH, required_column=REQUIRED_COLUMNS, 
    column_schema=COLUMN_SCHEMA, sheet_name="Sheet1"
)

In [150]:
WEEKLY_PROD = read_to_dataframe(
    path=FILE_PATH, required_column=WEEKLY_RPD_REQUIRED_COLUMNS, 
    column_schema=WEEKLY_PRD_SCHEMA, sheet_name="Weekly Prod. Sche. 12.12-18.12"
)

KeyError: "None of [Index(['STO No.', 'Item', 'Quantity (kg)', 'Production', 'FG'], dtype='object')] are in the [columns]"

In [120]:
WEEKLY_PROD.head()

Unnamed: 0,sto_no,sto_status,receiving_plant,item_description,item_sku_qty,sku_uom,delivery_no,pgi_quantity,pgi_uom,pgi_date
0,4500023000.0,CLOSED,5101.0,Wrapper-Fresh Refined Sugar Pouch 1KG,15000.0,KG,86931369.0,3284,KG,2021-09-09
1,4500023000.0,CLOSED,5101.0,Wrapper-Fresh Refined Sugar Pouch 1KG,15000.0,KG,86994028.0,3642,KG,2021-09-30
2,4500023000.0,CLOSED,5101.0,Wrapper-Fresh Refined Sugar Pouch 1KG,15000.0,KG,87029546.0,2470,KG,2021-10-03
3,4500023000.0,CLOSED,5101.0,Wrapper-Fresh Refined Sugar Pouch 1KG,15000.0,KG,87062849.0,1027,KG,2021-10-10
4,4500023000.0,CLOSED,5101.0,Wrapper-Fresh Refined Sugar Pouch 1KG,15000.0,KG,87068430.0,2964,KG,2021-10-11


In [107]:
def create_uid(df: pd.DataFrame) -> pd.DataFrame:
    df["temp"] = df["sto_no"].astype('Int64').astype('str')
    df["uid"] = df["temp"] + " " + df["item_description"]
    df = df.drop("temp", axis=1)
    return df

In [111]:
def make_pgi_qty_df(df: pd.DataFrame) -> pd.DataFrame:
    selected_columns = [
        'sto_no', 'item_description', 'receiving_plant', 
        'delivery_no', 'pgi_date', 'pgi_quantity', 'pgi_uom'
    ]
    _df = df[selected_columns].copy()
    _df = create_uid(df=_df)
    new_col_list = [
        'uid', 'sto_no', 'item_description', 'receiving_plant', 
        'delivery_no', 'pgi_date', 'pgi_quantity', 'pgi_uom'
    ]
    _df = _df[new_col_list]
    return _df

In [112]:
PGI_QTY_DF = make_pgi_qty_df(RAW_DF)

In [113]:
PGI_QTY_DF.head()

Unnamed: 0,uid,sto_no,item_description,receiving_plant,delivery_no,pgi_date,pgi_quantity,pgi_uom
0,4500023106 Wrapper-Fresh Refined Sugar Pouch 1KG,4500023000.0,Wrapper-Fresh Refined Sugar Pouch 1KG,5101.0,86931369.0,2021-09-09,3284,KG
1,4500023106 Wrapper-Fresh Refined Sugar Pouch 1KG,4500023000.0,Wrapper-Fresh Refined Sugar Pouch 1KG,5101.0,86994028.0,2021-09-30,3642,KG
2,4500023106 Wrapper-Fresh Refined Sugar Pouch 1KG,4500023000.0,Wrapper-Fresh Refined Sugar Pouch 1KG,5101.0,87029546.0,2021-10-03,2470,KG
3,4500023106 Wrapper-Fresh Refined Sugar Pouch 1KG,4500023000.0,Wrapper-Fresh Refined Sugar Pouch 1KG,5101.0,87062849.0,2021-10-10,1027,KG
4,4500023106 Wrapper-Fresh Refined Sugar Pouch 1KG,4500023000.0,Wrapper-Fresh Refined Sugar Pouch 1KG,5101.0,87068430.0,2021-10-11,2964,KG


In [48]:
def has_gum_text(text: str):
    if "gum" in text.lower():
        return True
    return False

In [54]:
def x_square(x):
    return x*2

x_square_one_line = lambda x: x*x

In [58]:
RAW_DF["item_description"].head()

0    False
1    False
2    False
3    False
4    False
Name: item_description, dtype: bool

In [73]:
def make_summary_df(df: pd.DataFrame) -> pd.DataFrame:
    selected_columns = [
        "sto_no", "item_description", 
        "receiving_plant", "sku_uom", 
        "item_sku_qty", "pgi_quantity"
    ]
    _df = df[selected_columns]
    _df = _df[_df["sku_uom"] != "PC"]
    _df = _df[~df["item_description"].apply(lambda x: has_gum_text(x))]
    _df = (df
           .groupby(["sto_no", "item_description", "receiving_plant", "sku_uom", "item_sku_qty"])
           .agg(pgi_quantity=("pgi_quantity", "sum"))
           .reset_index()
          )
    _df["pending_qty"] = _df["item_sku_qty"] - _df["pgi_quantity"]
    _df_uid
    return _df.copy()

In [74]:
SUMMARY_DF = make_summary_df(RAW_DF)

  _df = _df[~df["item_description"].apply(lambda x: has_gum_text(x))]


In [75]:
SUMMARY_DF.head()

Unnamed: 0,sto_no,item_description,receiving_plant,sku_uom,item_sku_qty,pgi_quantity,pending_qty
0,4500023000.0,Wrapper-Fresh Refined Sugar Pouch 1KG,5101.0,KG,15000.0,15000,0.0
1,4500023000.0,Wrapper-Fresh Refined Sugar Pouch 500G,5101.0,KG,1500.0,1500,0.0
2,4500023000.0,Wrapper-No.1 Refined Sugar Pouch 1KG,5101.0,KG,1500.0,1500,0.0
3,4500023000.0,Wrapper-No.1 Refined Sugar Pouch 500G,5101.0,KG,500.0,488,12.0
4,4500023000.0,Wrap-No.1 Pusti Protidin Milk 15g,3001.0,KG,5100.0,5100,0.0


In [15]:
# test_list = []
# for item in raw_df.columns:
#     value = item.lower().replace(" ", "_").replace(".", "").replace("/", "")
#     test_list.append(value)

In [7]:
raw_df.head()

Unnamed: 0,STO No.,STO Date,STO Status,Receiving Plant,Receiving Plant Desc,Item Description,Item Qty,UOM,Item SKU Qty,SKU UoM,...,Vendor Name,Receipt Qty,Receipt UOM,Shortage Qty,Shortage Unit,Variance Reason,Excess Quantity,Excess Unit,GR SKU Qty,GR SKU UoM
0,4500023000.0,2021-09-01,CLOSED,5101.0,MSRL Plant,Wrapper-Fresh Refined Sugar Pouch 1KG,15000.0,KG,15000.0,KG,...,,3284.0,KG,0.0,,,0.0,,3284.0,KG
1,4500023000.0,2021-09-01,CLOSED,5101.0,MSRL Plant,Wrapper-Fresh Refined Sugar Pouch 1KG,15000.0,KG,15000.0,KG,...,,3642.0,KG,0.0,,,0.0,,3642.0,KG
2,4500023000.0,2021-09-01,CLOSED,5101.0,MSRL Plant,Wrapper-Fresh Refined Sugar Pouch 1KG,15000.0,KG,15000.0,KG,...,,2470.0,KG,0.0,,,0.0,,2470.0,KG
3,4500023000.0,2021-09-01,CLOSED,5101.0,MSRL Plant,Wrapper-Fresh Refined Sugar Pouch 1KG,15000.0,KG,15000.0,KG,...,,1027.0,KG,0.0,,,0.0,,1027.0,KG
4,4500023000.0,2021-09-01,CLOSED,5101.0,MSRL Plant,Wrapper-Fresh Refined Sugar Pouch 1KG,15000.0,KG,15000.0,KG,...,,2964.0,KG,0.0,,,0.0,,2964.0,KG
