In [1]:
import pandas as pd
import numpy as np

pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 55)

In [2]:
df = pd.read_excel('../data/posco/xnk.xls', header=9)

In [3]:
'''
Take out E21, B13, A42 items
'''

e21 = df.loc[df['Mã loại hình'] == 'E21', ['Mã NPL/SP', 'Ngày ĐK', 'Đơn vị tính', 'Tổng số lượng']]
b13 = df.loc[df['Mã loại hình'] == 'B13', ['Mã NPL/SP', 'Ngày ĐK', 'Đơn vị tính', 'Tổng số lượng']]
a42 = df.loc[df['Mã loại hình'] == 'A42', ['Mã NPL/SP', 'Ngày ĐK', 'Đơn vị tính', 'Tổng số lượng']]

for df in [e21, b13, a42]:
    df.rename(columns={
        'Mã NPL/SP': 'Items',
        'Ngày ĐK': 'Date',
        'Đơn vị tính': 'Unit',
        'Tổng số lượng': 'Qty'
    }, inplace=True)

In [4]:
'''
Output for production
'''
take_out_free = pd.read_excel('../data/posco/take_out_free.xls', header=6)
take_out_free = take_out_free[['Date', 'Item Code', 'Unit', 'Qty', 'Weight']]
take_out_free['Date'] = pd.to_datetime(take_out_free['Date'], format="%d/%m/%Y", yearfirst=True)

take_out = pd.read_excel('../data/posco/take_out.xlsx', header=9)
del take_out['Date']
take_out.rename(columns={
    'Date.1': 'Date'
}, inplace=True)
take_out['Date'] = pd.to_datetime(take_out['Date'], format="%d/%m/%Y", yearfirst=True)
take_out = take_out[['Date', 'Item Code', 'Unit', 'Qty', 'Weight']]

output_for_production = pd.concat([take_out, take_out_free])
output_for_production.reset_index(inplace=True)
del output_for_production['index'], take_out, take_out_free
output_for_production.rename(columns={
    'Item Code' : 'Items'
}, inplace=True)

output_for_production['Qty'] = np.where(
    (
        (output_for_production['Unit'] == 'KG') &
        (
            (output_for_production['Weight'].notnull()) |
            (output_for_production['Weight'] != 0)
        )
    ), 
    output_for_production['Weight'], 
    output_for_production['Qty']
)
del output_for_production['Weight']

In [5]:
output_for_production

Unnamed: 0,Date,Items,Unit,Qty
0,2021-04-12,Z0101010010002,PCS,1.000
1,2021-06-21,Z0101010010002,PCS,2.000
2,2021-01-22,Z0101010010002,PCS,1.000
3,2021-03-25,Z0101010010002,PCS,1.000
4,2021-03-04,Z0101010010002,PCS,1.000
...,...,...,...,...
17150,2021-06-22,Z0106010000299$,PCS,-168.000
17151,2021-06-22,Z0106010000286$,PCS,-18.000
17152,2021-06-22,Z0106010000300$,PCS,-18.000
17153,2021-06-22,Z0106010000301$,PCS,-12.000


In [6]:
'''
Return
'''
return_df = pd.read_excel('../data/posco/return.xlsx', header=6)
return_df = return_df[['Date', 'Item Code', 'Unit', 'Return Qty', 'Return Weight']]
return_df['Date'] = pd.to_datetime(return_df['Date'], format="%Y%m%d", yearfirst=True)

# aggregation_functions = {
#     'Item Name': 'first',
#     'Unit': 'first',
#     'Return Qty': 'sum',
#     'Return Weight': 'sum'
# }
# return_df['Item Code'] = return_df['Item Code'].str.replace('$', '', regex=True)
# return_df = return_df.groupby(return_df['Item Code']).aggregate(aggregation_functions)

return_df.rename(columns={
    'Item Code' : 'Items',
    'Return Qty': 'Qty',
    'Return Weight': 'Weight'
}, inplace=True)

return_df['Qty'] = np.where(
    (
        (return_df['Unit'] == 'KG') &
        (
            (return_df['Weight'].notnull()) |
            (return_df['Weight'] != 0)
        )
    ), 
    return_df['Weight'], 
    return_df['Qty']
)
del return_df['Weight']

  warn("""Cannot parse header or footer so it will be ignored""")


In [7]:
'''
WAREHOUSE
'''
warehouse = pd.read_excel('../data/posco/iob.xlsx', header=9, skipfooter=10)
warehouse.rename(columns={
    'Unnamed: 0': 'Items',
    'Unnamed: 1': 'Name',
    'Unnamed: 3': 'Unit',
    'Unnamed: 2': 'Project',
    'Unnamed: 13': 'Qty'
}, inplace=True)
warehouse = warehouse[['Items', 'Project', 'Unit', 'Qty']]
# aggregation_functions = {
#     'Name': 'first',
#     'Unit': 'first',
#     'Qty': 'sum',
# }
# warehouse = warehouse.groupby(warehouse['Items']).aggregate(aggregation_functions)

In [8]:
transfer = pd.read_excel('../data/posco/transfer.xlsx', header=6)
transfer['Date'] = pd.to_datetime(transfer['Date'], format="%d/%m/%Y", yearfirst=True)
transfer = transfer[['Date', 'Item Code', 'Unit', 'Transfer Qty', 'Transfer Weight']]
transfer.rename(columns={
    'Item Code': 'Items',
    'Transfer Qty': 'Qty',
    'Transfer Weight': 'Weight'
}, inplace=True)
transfer['Qty'] = np.where(
    (
        (transfer['Unit'] == 'KG') &
        (
            (transfer['Weight'].notnull()) |
            (transfer['Weight'] != 0)
        )
    ), 
    transfer['Weight'], 
    transfer['Qty']
)
del transfer['Weight']

In [9]:

for df in [output_for_production, return_df, warehouse, transfer]:
#     trim_strings = lambda x: x.strip() if isinstance(x, str) else x
#     return df.applymap(trim_strings)
    df['Unit'] = df['Unit'].str.strip()
    conditions = [
        (df['Unit'].isin(['Bottle', 'Box', 'CAN', 'EA', 'PCS', 'PAIL'])),
        (df['Unit'] == 'KG'),
        (df['Unit'].isin(['SH', 'ROLL', 'COIL', 'L', 'M', 'M2'])),
        (df['Unit'].isin(['SET', 'SUITE', 'PAIR']))
    ]
    values = ['Cái/Chiếc', 'Kilogam', 'Cuộn', 'Bộ']
    df['Unit'] = np.select(conditions, values)


In [10]:
# conditions = [
#     (transfer['Unit'].isin(['Bottle', 'Box', 'CAN', 'EA', 'PCS', 'PAIL'])),
#     (transfer['Unit'] == 'KG'),
#     (transfer['Unit'].isin(['SH', 'ROLL', 'COIL', 'M', 'M2'])),
#     (transfer['Unit'].isin(['SET', 'SUITE', 'PAIR']))
# ]
# values = ['Cái/Chiếc', 'Kilogam', 'Cuộn', 'Bộ']
# transfer['Unit'] = np.select(conditions, values)
# pd.unique(transfer['Unit'])
pd.unique(warehouse['Unit'])

array(['Cái/Chiếc', 'Kilogam', 'Cuộn', 'Bộ', '0'], dtype=object)

In [11]:
'''
Feburary
'''
# e21 = e21.loc[e21['Date'].dt.month == 2]
# output_for_production = output_for_production.loc[output_for_production['Date'].dt.month == 2]
# return_df = return_df.loc[return_df['Date'].dt.month == 2]


# output_for_production = output_for_production.groupby(['Items','Unit'],as_index=False)['Qty'].sum()
# output_for_production.rename(columns={
#     'Qty': 'Output for production'
# }, inplace=True)

# e21 = e21.groupby(['Items','Unit'],as_index=False)['Qty'].sum()
# e21.rename(columns={
#     'Qty': 'Import'
# }, inplace=True)

# return_df['Qty'] = [int(x) for x in return_df['Qty']]
# return_df = return_df.groupby(['Items','Unit'],as_index=False)['Qty'].sum()
# return_df.rename(columns={
#     'Qty': 'Return'
# }, inplace=True)

# balanced = pd.merge(left=e21_02, right=output_for_production_02, how='left', left_on='Items', right_on='Items')

# grouped_e21 = e21_02.groupby(["Items", "Unit"])
# test_df = [None]
# for _, item in grouped_e21:
#     item.aggregate({
#     'Qty': 'sum'
#     })
#     test_df.append(item)
# warehouse = warehouse.groupby(warehouse['Items']).aggregate(aggregation_functions)


# test_df = e21_02.groupby(['Items','Unit'],as_index=False)['Quantity'].sum()
# test_df = pd.concat(test_df)


'\nFeburary\n'

In [12]:
# test_report = pd.merge(
#     e21, output_for_production,  how='left', 
#     left_on=['Items','Unit'], 
#     right_on = ['Items','Unit']
# )
# test_report = pd.merge(
#     test_report, return_df,  how='left', 
#     left_on=['Items','Unit'], 
#     right_on = ['Items','Unit']
# )
# from functools import reduce

# dfs = [e21, output_for_production, return_df]
# test_report = reduce(lambda left, right: pd.merge(left, right, how='left', 
#     left_on=['Items','Unit'], 
#     right_on = ['Items','Unit']), dfs
# )


In [13]:
# output_for_production_02
# grouped_output = output_for_production_02.groupby(["Items", "Unit"])
# output_df = [None]
# for _, item in grouped_output:
#     del item['Qty'], item['Weight']
#     item['Quantity'].sum()
#     output_df.append(item)
# output_df = pd.concat(output_df)


In [14]:

# output_df['Quantity'] = output_df['Quantity'].transform(lambda x : sum(x))
# output_df = output_df.groupby(output_df['Items', 'Unit']).aggregate({'Quantity': 'sum'})


In [15]:
'''
Input files: e21, b13, a42, output_for_production, return_df, warehouse, transfer
'''

'\nInput files: e21, b13, a42, output_for_production, return_df, warehouse, transfer\n'

In [26]:
'''
Snippet for final result
'''
E21_DF = [None] * 13
B13_DF = [None] * 13
A42_DF = [None] * 13
OUTPUT_DF = [None] * 13
RETURN_DF = [None] * 13
TRANSFER_DF = [None] * 13
WAREHOUSE_DF = [None] * 13
BALANCED_REPORT = [None] * 13

from functools import reduce

for month in range(1, 13):
    E21_DF[month] = e21.loc[e21['Date'].dt.month == month]
    B13_DF[month] = b13.loc[b13['Date'].dt.month == month]
    A42_DF[month] = a42.loc[a42['Date'].dt.month == month]
    OUTPUT_DF[month] = output_for_production.loc[output_for_production['Date'].dt.month == month]
    RETURN_DF[month] = return_df.loc[return_df['Date'].dt.month == month]
    
    E21_DF[month] = E21_DF[month].groupby(['Items','Unit'],as_index=False)['Qty'].sum()
    E21_DF[month].rename(columns={
        'Qty': 'Import'
    }, inplace=True)
    
    B13_DF[month] = B13_DF[month].groupby(['Items','Unit'],as_index=False)['Qty'].sum()
    B13_DF[month].rename(columns={
        'Qty': 'Re-export'
    }, inplace=True)
    
    A42_DF[month] = A42_DF[month].groupby(['Items','Unit'],as_index=False)['Qty'].sum()
    A42_DF[month].rename(columns={
        'Qty': 'Re-purpose'
    }, inplace=True)
    
    OUTPUT_DF[month] = OUTPUT_DF[month].groupby(['Items','Unit'],as_index=False)['Qty'].sum()
    OUTPUT_DF[month].rename(columns={
        'Qty': 'Output for production'
    }, inplace=True)

    RETURN_DF[month]['Qty'] = [int(x) for x in RETURN_DF[month]['Qty']]
    RETURN_DF[month] = RETURN_DF[month].groupby(['Items','Unit'],as_index=False)['Qty'].sum()
    RETURN_DF[month].rename(columns={
        'Qty': 'Return'
    }, inplace=True)
    
    dfs = [E21_DF[month], B13_DF[month], A42_DF[month], OUTPUT_DF[month], RETURN_DF[month]]
    BALANCED_REPORT[month] = reduce(lambda left, right: pd.merge(left, right, how='left', 
        left_on=['Items','Unit'], 
        right_on = ['Items','Unit']), dfs
    )

    BALANCED_REPORT[month].fillna(0, inplace=True)
    BALANCED_REPORT[month]['Ecus stock'] = (
    BALANCED_REPORT[month]['Import'] - BALANCED_REPORT[month]['Output for production'] - 
    BALANCED_REPORT[month]['Re-export'] - BALANCED_REPORT[month]['Re-purpose']
    )
    BALANCED_REPORT[month]['begin'] = np.nan
    #     if month > 1:
    #         BALANCED_REPORT[month]['begin'].loc[BALANCED_REPORT[month].isin(BALANCED_REPORT[month - 1]['Items']), ['Nonprofit']] = BALANCED_REPORT[month - 1][['Ecus stock']]
    #         BALANCED_REPORT[month]
    #         BALANCED_REPORT[month]['begin'] = BALANCED_REPORT[month - 1].lookup(BALANCED_REPORT[month - 1]['Items'], BALANCED_REPORT[month - 1]['Ecus stock'])
    #         BALANCED_REPORT[month]['begin']*df_lookup.loc[df.index.hour].values
    #         BALANCED_REPORT[month]['begin'] = BALANCED_REPORT[month - 1]['Ecus stock']*df_lookup.loc[df['Items']].values
    

In [28]:

with pd.ExcelWriter('../output/posco_result.xlsx') as writer:  
    for month in range(1, 13):
        BALANCED_REPORT[month].to_excel(writer, sheet_name=f'BALANCED_{month:02}')

Unnamed: 0,Items,Unit,Import,Re-export,Re-purpose,Output for production,Return,Ecus stock,begin
0,Z0101030030001,Kilogam,28944.0,0.0,0.0,0.0,0.0,28944.0,
1,Z0101030050001,Kilogam,23040.0,0.0,0.0,0.0,0.0,23040.0,
2,Z0101050390001,Kilogam,15120.0,0.0,0.0,0.0,0.0,15120.0,
3,Z0101050390002,Kilogam,1603.0,0.0,0.0,0.0,0.0,1603.0,
4,Z0101070090001,Kilogam,853.2,0.0,0.0,0.0,0.0,853.2,
...,...,...,...,...,...,...,...,...,...
95,Z0202060001025$,Cái/Chiếc,172.0,0.0,0.0,0.0,0.0,172.0,
96,Z0202060001026$,Cái/Chiếc,105.0,0.0,0.0,0.0,0.0,105.0,
97,Z0202060001027$,Cái/Chiếc,405.0,0.0,0.0,0.0,0.0,405.0,
98,Z0202060001028$,Cái/Chiếc,533.0,0.0,0.0,0.0,0.0,533.0,
