In [442]:
import pandas as pd
import numpy as np

# READ DATAFRAME
bom_df = pd.read_excel('../data/2022-05-28_REFUND.xlsx', sheet_name='BOM')
a12_df = pd.read_excel('../data/2022-05-28_REFUND.xlsx', converters={'Số TK': str}, sheet_name='2021-2022.A12', thousands=',')
e62_df = pd.read_excel('../data/2022-05-28_REFUND.xlsx', converters={'Số TK': str}, sheet_name='E62', thousands=',')

def datetime_to_str(row):
    try:
        return row['Ngày ĐK'].strftime('%d/%m/%Y')
    except:
        return row['Ngày ĐK']


a12_df.sort_values(['Mã NPL/SP', 'Ngày ĐK'], inplace=True)
e62_df.sort_values(['Mã NPL/SP', 'Ngày ĐK'], inplace=True)

a12_df['Ngày ĐK'] = a12_df.apply(lambda row: datetime_to_str(row), axis=1)
a12_df['Ngày ĐK'] = pd.to_datetime(a12_df['Ngày ĐK'], dayfirst=True)

e62_df['Ngày ĐK'] = e62_df.apply(lambda row: datetime_to_str(row), axis=1)
e62_df['Ngày ĐK'] = pd.to_datetime(e62_df['Ngày ĐK'], dayfirst=True)

a12_df = a12_df[['Số TK', 'Ngày ĐK', 'Tổng số lượng', 'Mã NPL/SP', 'Đơn vị tính', 'Tổng trị giá', 'Thuế suất XNK']]
e62_df = e62_df[['Số TK', 'Ngày ĐK', 'Tổng số lượng', 'Mã NPL/SP', 'Đơn vị tính']]

a12_df = a12_df.groupby(['Số TK', 'Ngày ĐK', 'Mã NPL/SP', 'Đơn vị tính', 'Tổng trị giá', 'Thuế suất XNK'], sort=False).aggregate({'Tổng số lượng': 'sum'})
e62_df = e62_df.groupby(['Số TK', 'Ngày ĐK', 'Mã NPL/SP', 'Đơn vị tính'], sort=False).aggregate({'Tổng số lượng': 'sum'})

a12_df.sort_values(['Mã NPL/SP', 'Ngày ĐK'], inplace=True)
e62_df.sort_values(['Mã NPL/SP', 'Ngày ĐK'], inplace=True)

e62_df.reset_index(inplace=True)
a12_df.reset_index(inplace=True)

In [443]:
# CREATE BARE DF
a12_df.rename(columns={
    'Mã NPL/SP': 'nvl_code',
    'Số TK': 'import_cd',
    'Tên hàng': 'nvl_name',
    'Đơn vị tính': 'nvl_unit',
    'Tổng số lượng': 'import_qty'
}, inplace=True)

e62_df.rename(columns={
    'Mã NPL/SP': 'tp_code',
    'Tổng số lượng': 'tp_qty',
    'Số TK': 'export_cd',
}, inplace=True)

export_df = pd.merge(e62_df, bom_df, how='left', on='tp_code')

# THIS IS DF WILL BE WORKING ON
df = pd.merge(a12_df, export_df, how="outer", on=['nvl_code', 'Ngày ĐK'], sort=False, indicator=False, validate=None)

df = df.loc[df['nvl_code'].isin(a12_df['nvl_code'].unique()) & df['nvl_code'].isin(bom_df['nvl_code'].unique())].sort_values(['nvl_code', 'Ngày ĐK'])
df.reset_index(inplace=True)
del df['index']

def nvl_unit(row):
    if not pd.isna(row['nvl_unit_x']):
        return row['nvl_unit_x']
    return row['nvl_unit_y']
df['nvl_unit'] = df.apply(lambda row: nvl_unit(row), axis=1)
df = df[['import_cd', 'export_cd', 'Ngày ĐK', 'import_qty', 'nvl_code', 'nvl_unit', 'tp_code', 'Đơn vị tính', 'tp_qty', 'tp_unit', 'bom', 'Tổng trị giá', 'Thuế suất XNK']]


In [444]:
# CLEAR DUPLICATED ROW
unique_import = df.drop_duplicates(subset=['nvl_code', 'Ngày ĐK', 'import_cd'], keep='first')
unique_import = unique_import.loc[~pd.isna(unique_import['import_cd'])]

import_cds = []
for i, row in df.iterrows():
    if (not pd.isna(row['import_cd'])) and (i not in unique_import.index):
        import_cds.append(np.nan)
    else:
        import_cds.append(row['import_cd'])

df['import_cd'] = import_cds


unique_export = df.drop_duplicates(subset=['nvl_code', 'Ngày ĐK', 'export_cd', 'tp_code'], keep='first')
unique_export = unique_export.loc[~pd.isna(unique_export['export_cd'])]

export_cds = []
for i, row in df.iterrows():
    if (not pd.isna(row['export_cd'])) and (i not in unique_export.index):
        export_cds.append(np.nan)
    else:
        export_cds.append(row['export_cd'])

df['export_cd'] = export_cds

df = df.loc[~pd.isna(df['import_cd']) | ~pd.isna(df['export_cd'])]
df.sort_values(['nvl_code', 'Ngày ĐK'], inplace=True)
df.reset_index(inplace=True)
del df['index']

In [445]:
# REMOVE EXPORT FIRST
current_code = None

current_import_cds = []
current_export_cds = []

indexes_to_delete = []
for count, row in df.iterrows():
    if current_code:
        if current_code == row['nvl_code']:
            current_import_cds.append(row['import_cd'])
            current_export_cds.append(row['export_cd'])
        else:
            k = 0
            for i in range(len(current_export_cds)):
                if pd.isna(current_import_cds[i]) or not current_import_cds[i]:
                    k += 1
                else:
                    break
            if k > 0:
                indexes_to_delete += range(count-len(current_export_cds), count-len(current_export_cds)+k)

            current_import_cds = []
            current_export_cds = []
            current_import_cds.append(row['import_cd'])
            current_export_cds.append(row['export_cd'])
    else:
        current_import_cds.append(row['import_cd'])
        current_export_cds.append(row['export_cd'])

    current_code = row['nvl_code']


df = df.drop(indexes_to_delete)
df.reset_index(inplace=True)
del df['index']

df['export_date'] = df['Ngày ĐK']
df.rename(columns={'Ngày ĐK': 'import_date'}, inplace=True)

In [446]:
# MOVE TO TOP
nvl_units = []
import_cds = []
import_dates = []
import_qtys = []
tax_totals = []
tax_rates = []

export_cds = []
export_dates = []
tp_qtys = []
tps = []
tp_units = []
boms = []


to_delete = []

current_code = None
import_nulls = 0
export_nulls = 0

for count, row in df.iterrows():
    if not current_code:
        if not pd.isna(row['import_cd']):
            import_cds.append(row['import_cd'])
            nvl_units.append(row['nvl_unit'])
            import_dates.append(row['import_date'])
            import_qtys.append(row['import_qty'])
            tax_totals.append(row['Tổng trị giá'])
            tax_rates.append(row['Thuế suất XNK'])
        else:
            import_nulls += 1

        if not pd.isna(row['export_cd']):
            export_cds.append(row['export_cd'])
            export_dates.append(row['export_date'])
            tp_qtys.append(row['tp_qty'])
            tps.append(row['tp_code'])
            tp_units.append(row['Đơn vị tính'])
            boms.append(row['bom'])
        else:
            export_nulls += 1

    else:
        if current_code == row['nvl_code']:
            if not pd.isna(row['import_cd']):
                import_cds.append(row['import_cd'])
                nvl_units.append(row['nvl_unit'])
                import_dates.append(row['import_date'])
                import_qtys.append(row['import_qty'])
                tax_totals.append(row['Tổng trị giá'])
                tax_rates.append(row['Thuế suất XNK'])
            else:
                import_nulls += 1

            if not pd.isna(row['export_cd']):
                export_cds.append(row['export_cd'])
                export_dates.append(row['export_date'])
                tp_qtys.append(row['tp_qty'])
                tps.append(row['tp_code'])
                tp_units.append(row['Đơn vị tính'])
                boms.append(row['bom'])
            else:
                export_nulls += 1

            if count == df.shape[0] - 1:
                j = min([import_nulls, export_nulls])
                to_delete += range(count-j, count)
                import_cds += [None] * import_nulls
                nvl_units += [None] * import_nulls
                import_dates += [None] * import_nulls
                import_qtys += [None] * import_nulls
                tax_totals += [None] * import_nulls
                tax_rates += [None] * import_nulls

                export_cds += [None] * export_nulls
                export_dates += [None] * export_nulls
                tp_qtys += [None] * export_nulls
                tps += [None] * export_nulls
                tp_units += [None] * export_nulls
                boms += [None] * export_nulls
    
        else:
            j = min([import_nulls, export_nulls])
            to_delete += range(count-j, count)

            import_cds += [None] * import_nulls
            nvl_units += [None] * import_nulls
            import_dates += [None] * import_nulls
            import_qtys += [None] * import_nulls
            tax_totals += [None] * import_nulls
            tax_rates += [None] * import_nulls

            export_cds += [None] * export_nulls
            export_dates += [None] * export_nulls
            tp_qtys += [None] * export_nulls
            tps += [None] * export_nulls
            tp_units += [None] * export_nulls
            boms += [None] * export_nulls
            
            import_nulls = 0
            export_nulls = 0

            if pd.isna(row['import_cd']):
                import_nulls = 1
            else:
                import_cds.append(row['import_cd'])
                nvl_units.append(row['nvl_unit'])
                import_dates.append(row['import_date'])
                import_qtys.append(row['import_qty'])
                tax_totals.append(row['Tổng trị giá'])
                tax_rates.append(row['Thuế suất XNK'])

            if pd.isna(row['export_cd']):
                export_nulls = 1
            else:
                export_cds.append(row['export_cd'])
                export_dates.append(row['export_date'])
                tp_qtys.append(row['tp_qty'])
                tps.append(row['tp_code'])
                tp_units.append(row['Đơn vị tính'])
                boms.append(row['bom'])

    current_code = row['nvl_code']

df['import_cd'] = import_cds
df['nvl_unit'] = nvl_units
df['import_date'] = import_dates
df['import_qty'] = import_qtys
df['Tổng trị giá'] = tax_totals
df['Thuế suất XNK'] = tax_rates

df['export_cd'] = export_cds
df['export_date'] = export_dates
df['tp_qty'] = tp_qtys
df['tp_code'] = tps
df['tp_unit'] = tp_units
df['bom'] = boms

df = df.loc[~pd.isna(df['import_cd']) | ~pd.isna(df['export_cd'])]
df.reset_index(inplace=True)
del df['index']

In [447]:
# ARRANGE INDEX AND DELETE
df['export_qty'] = df['tp_qty'] * df['bom']

nvl_units = []
import_cds = []
import_dates = []
import_qtys = []
tax_totals = []
tax_rates = []

current_nvl_units = []
current_import_cds = []
current_import_dates = []
current_import_qtys = []
current_tax_totals = []
current_tax_rates = []

current_export_qtys = []

to_delete = []

current_code = None

for count, row in df.iterrows():
    if not current_code:
        current_nvl_units.append(row['nvl_unit'])
        current_import_cds.append(row['import_cd'])
        current_import_dates.append(row['import_date'])
        current_import_qtys.append(row['import_qty'])
        current_tax_totals.append(row['Tổng trị giá'])
        current_tax_rates.append(row['Thuế suất XNK'])

        current_export_qtys.append(row['export_qty'])
    else:
        if current_code == row['nvl_code']:
            current_nvl_units.append(row['nvl_unit'])
            current_import_cds.append(row['import_cd'])
            current_import_dates.append(row['import_date'])
            current_import_qtys.append(row['import_qty'])
            current_tax_totals.append(row['Tổng trị giá'])
            current_tax_rates.append(row['Thuế suất XNK'])

            current_export_qtys.append(row['export_qty'])

            if count == df.shape[0] - 1:
                clear_nvl_units = [x for x in current_nvl_units if not pd.isna(x)]
                clear_import_cds = [x for x in current_import_cds if not pd.isna(x)]
                clear_import_dates = [x for x in current_import_dates if not pd.isna(x)]
                clear_import_qtys = [x for x in current_import_qtys if not pd.isna(x)]
                clear_tax_totals = [x for x in current_tax_totals if not pd.isna(x)]
                clear_tax_rates = [x for x in current_tax_rates if not pd.isna(x)]

                # GET DICE FOR DEVIDE LIST
                k = 0
                im = 0
                ex = 0
                dice = [0]
                l = len(current_import_qtys)
                im = current_import_qtys.pop(0)
                
                for i in range(l):
                    ex += current_export_qtys[i]
                    if ex <= im:
                        pass
                    else:
                        try:
                            im += current_import_qtys.pop(0)
                            dice.append(i)
                        except IndexError:
                            dice.append(i)
                    if i == l - 1 and ex <= im:
                        dice.append(i)

                dice = list(set(dice))
                dice = sorted(dice)
                # --------------------------

                temp_nvl_units = []
                temp_import_cds = []
                temp_import_dates = []
                temp_import_qtys = []
                temp_tax_totals = []
                temp_tax_rates = []

                for i in range(len(dice)):
                    try:
                        temp_nvl_units.append(clear_nvl_units.pop(0))
                        temp_import_cds.append(clear_import_cds.pop(0))
                        temp_import_dates.append(clear_import_dates.pop(0))
                        temp_import_qtys.append(clear_import_qtys.pop(0))
                        temp_tax_totals.append(clear_tax_totals.pop(0))
                        temp_tax_rates.append(clear_tax_rates.pop(0))
                    except IndexError:
                        break

                    if i < len(dice) - 1:
                        temp_nvl_units += [None] * (dice[i+1] - dice[i] - 1)
                        temp_import_cds += [None] * (dice[i+1] - dice[i] - 1)
                        temp_import_dates += [None] * (dice[i+1] - dice[i] - 1)
                        temp_import_qtys += [None] * (dice[i+1] - dice[i] - 1)
                        temp_tax_totals += [None] * (dice[i+1] - dice[i] - 1)
                        temp_tax_rates += [None] * (dice[i+1] - dice[i] - 1)

                if len(temp_import_cds) < len(current_export_qtys):
                    temp_nvl_units.append(None)
                    temp_import_cds.append(None)
                    temp_import_dates.append(None)
                    temp_import_qtys.append(None)
                    temp_tax_totals.append(None)
                    temp_tax_rates.append(None)

                to_delete += range(count-(len(current_export_qtys) - len(temp_import_qtys)), count)
                
                nvl_units += temp_nvl_units
                import_cds += temp_import_cds
                import_dates += temp_import_dates
                import_qtys += temp_import_qtys
                tax_totals += temp_tax_totals
                tax_rates += temp_tax_rates

        else:
            clear_nvl_units = [x for x in current_nvl_units if not pd.isna(x)]
            clear_import_cds = [x for x in current_import_cds if not pd.isna(x)]
            clear_import_dates = [x for x in current_import_dates if not pd.isna(x)]
            clear_import_qtys = [x for x in current_import_qtys if not pd.isna(x)]
            clear_tax_totals = [x for x in current_tax_totals if not pd.isna(x)]
            clear_tax_rates = [x for x in current_tax_rates if not pd.isna(x)]

            # GET DICE FOR DEVIDE LIST
            k = 0
            im = 0
            ex = 0
            dice = [0]
            l = len(current_import_qtys)
            im = current_import_qtys.pop(0)
            
            for i in range(l):
                ex += current_export_qtys[i]
                if ex <= im:
                    pass
                else:
                    try:
                        im += current_import_qtys.pop(0)
                        dice.append(i)
                    except IndexError:
                        dice.append(i)
                if i == l - 1 and ex <= im:
                    dice.append(i)

            dice = list(set(dice))
            dice = sorted(dice)
            # --------------------------

            temp_nvl_units = []
            temp_import_cds = []
            temp_import_dates = []
            temp_import_qtys = []
            temp_tax_totals = []
            temp_tax_rates = []

            for i in range(len(dice)):
                try:
                    temp_nvl_units.append(clear_nvl_units.pop(0))
                    temp_import_cds.append(clear_import_cds.pop(0))
                    temp_import_dates.append(clear_import_dates.pop(0))
                    temp_import_qtys.append(clear_import_qtys.pop(0))
                    temp_tax_totals.append(clear_tax_totals.pop(0))
                    temp_tax_rates.append(clear_tax_rates.pop(0))
                except IndexError:
                    break

                if i < len(dice) - 1:
                    temp_nvl_units += [None] * (dice[i+1] - dice[i] - 1)
                    temp_import_cds += [None] * (dice[i+1] - dice[i] - 1)
                    temp_import_dates += [None] * (dice[i+1] - dice[i] - 1)
                    temp_import_qtys += [None] * (dice[i+1] - dice[i] - 1)
                    temp_tax_totals += [None] * (dice[i+1] - dice[i] - 1)
                    temp_tax_rates += [None] * (dice[i+1] - dice[i] - 1)

            if len(temp_import_cds) < len(current_export_qtys):
                temp_nvl_units.append(None)
                temp_import_cds.append(None)
                temp_import_dates.append(None)
                temp_import_qtys.append(None)
                temp_tax_totals.append(None)
                temp_tax_rates.append(None)

            to_delete += range(count-(len(current_export_qtys) - len(temp_import_qtys)), count)
            
            nvl_units += temp_nvl_units
            import_cds += temp_import_cds
            import_dates += temp_import_dates
            import_qtys += temp_import_qtys
            tax_totals += temp_tax_totals
            tax_rates += temp_tax_rates

            current_nvl_units = []
            current_import_cds = []
            current_import_dates = []
            current_import_qtys = []
            current_tax_totals = []
            current_tax_rates = []
            current_export_qtys = []

            current_nvl_units.append(row['nvl_unit'])
            current_import_cds.append(row['import_cd'])
            current_import_dates.append(row['import_date'])
            current_import_qtys.append(row['import_qty'])
            current_tax_totals.append(row['Tổng trị giá'])
            current_tax_rates.append(row['Thuế suất XNK'])

            current_export_qtys.append(row['export_qty'])

    current_code = row['nvl_code']

df = df.drop(to_delete)

df['nvl_unit'] = nvl_units
df['import_cd'] = import_cds
df['import_date'] = import_dates
df['import_qty'] = import_qtys
df['Tổng trị giá'] = tax_totals
df['Thuế suất XNK'] = tax_rates

df.reset_index(inplace=True)
del df['index']

df["begin"] = 0
df['import_qty'].fillna(0, inplace=True)
df['export_qty'].fillna(0, inplace=True)
df['temp_import_quantity'] = df.groupby('nvl_code')['import_qty'].cumsum()
df['temp_export_quantity'] = df.groupby('nvl_code')['export_qty'].cumsum()
df['end'] = df['begin'] + df['temp_import_quantity'] - df['temp_export_quantity']
df['begin'] = df['end'] + df['export_qty'] - df['import_qty']

df = df.drop(df[(df['begin'] < 0) & (df['end'] < 0) & (df['import_qty'] == 0)].index)

def revise_export_qty(row):
    if row['end'] < 0:
        if row['begin'] + row['import_qty'] > 0:
            return row['begin'] + row['import_qty']
        else:
            return 0
    return row['export_qty']

def revise_import_qty(row, column):
    if row[column] == 0:
        return None
    return row[column]


df['export_qty'] = df.apply(lambda row: revise_export_qty(row), axis=1)

df['begin'] = 0
df['import_qty'].fillna(0, inplace=True)
df['export_qty'].fillna(0, inplace=True)
df['temp_import_quantity'] = df.groupby('nvl_code')['import_qty'].cumsum()
df['temp_export_quantity'] = df.groupby('nvl_code')['export_qty'].cumsum()
df['end'] = df['begin'] + df['temp_import_quantity'] - df['temp_export_quantity']
df['begin'] = df['end'] + df['export_qty'] - df['import_qty']

df['Tổng trị giá'].fillna(0, inplace=True)
df['Thuế suất XNK'].fillna(0, inplace=True)

df['Tổng trị giá'] =  df['Tổng trị giá'].astype(float)
df['Thuế suất XNK'] =  df['Thuế suất XNK'].astype(float)

df['tax'] = df['Tổng trị giá'] * df['Thuế suất XNK'] / 100

df['import_qty'] =      df.apply(lambda row: revise_import_qty(row, column='import_qty'), axis=1)
df['Tổng trị giá'] =    df.apply(lambda row: revise_import_qty(row, column='Tổng trị giá'), axis=1)
df['Thuế suất XNK'] =   df.apply(lambda row: revise_import_qty(row, column='Thuế suất XNK'), axis=1)
df['tax'] =             df.apply(lambda row: revise_import_qty(row, column='tax'), axis=1)