In [0]:
import pandas as pd
from collections import OrderedDict
import json
import numpy as np

In [0]:
%run "./fetch_data"

In [0]:
# ws_code_name_dict = spark.sql('select distinct int(wholesaler_pay_code) as payercode,  wholesaler_name as payername from budtech_brewdat_prod_ods.abi_cloud_wholesaler_ws_wholesaler').toPandas().set_index('payercode')
# ws_code_name_dict = dict(ws_code_name_dict['payername'])

In [0]:
def get_summary(df, month_name_index, last_month_n=-1):
    if last_month_n != -1:
        data_index = month_name_index[:(last_month_n + 1)]
    else:
        data_index = month_name_index    # 如果是最后一个，就是全量的month index
    one_ws_inv2 = df.set_index('index')
    in_stw_df = one_ws_inv2.loc['STW(+)', data_index]
    total_in_stw = in_stw_df.sum()  # 分母，进货的量  total_stw
    balance_df_at_last = one_ws_inv2.iloc[2:last_month_n, last_month_n]# 最后一个月所有stw分别的库存，包括期初库存
    expired_balance_df = balance_df_at_last[:-(expire_month + 1)]
    write_off_stw = expired_balance_df.sum()    #分子
    write_off_amt_prct = write_off_stw / total_in_stw if total_in_stw > 0 else None          # write off的量 / 进货的量
    write_off_month_cnt = (expired_balance_df > 0).sum()
    has_stw_month = (in_stw_df > 0).sum()
    write_off_month_prct = write_off_month_cnt / has_stw_month if has_stw_month > 0 else None
    valid_balance_df = balance_df_at_last[-(expire_month + 1):]
    valid_stw_balance = valid_balance_df.sum()
    return pd.Series({
        "total_in_stw": total_in_stw, 
        "write_off_stw": write_off_stw, 
        "write_off_amt_pct": write_off_amt_prct, 
        "write_off_month_cnt": write_off_month_cnt, 
        "has_stw_month": has_stw_month,
        "write_off_month_pct": write_off_month_prct,
        "valid_stw_balance": valid_stw_balance})


def read_detail(name):
    print('reading:', name)
    result = spark.read.parquet(name)
    result = result.toPandas()
    # # 暂时填充下all brand的月份
    # result['expire_month'] = result['expire_month'].apply(lambda x: int(x) if not pd.isna(x) else expire_month)
    result = result.rename(columns={"brand_family_comdb": "brand", 'payercode':"ws_code"})
    result = result[result['ws_code'].notnull()]
    return result


def get_one_month_result(result, last_month_n=-1, is_t15_wf=False):
    # opening_balance + 46个月的月份列表
    month_name_index = list(result.columns)
    month_name_index = list(filter(lambda x: x not in ('index', 'ws_name', 'ws_code', 'bu', 'region', 'brand', 'expire_month', 't1_code', 't15_code', ), month_name_index))
    
    last_month_name = month_name_index[last_month_n]   # '202410'
    print(f"current calculating month {last_month_name}")
       
    if is_t15_wf:
        groupby = ['ws_name', 'ws_code', 't1_code', 't15_code', 'bu', 'region', 'brand', 'expire_month']
    else:
        groupby = ['ws_name', 'ws_code',  'bu', 'region', 'brand', 'expire_month']
    one_month_result = result.groupby(groupby, dropna=False).apply(lambda df: get_summary(df, month_name_index, last_month_n))
    one_month_result['end_month'] = last_month_name
    one_month_result = one_month_result.reset_index()
    return one_month_result


# 挂在t1下面的t1.5的库存
def get_t15_wf_items(t15_wf_df, t1_code, brand):
    result = t15_wf_df[(t15_wf_df['t1_code'] == str(t1_code)) & (t15_wf_df['brand'] == brand)]
    result = result[result['write_off_stw'] > 0.01]
    result = result.set_index('t15_code')['write_off_stw'].to_dict()
    total = sum(result.values())
    r = json.dumps(result, ensure_ascii=True)
    if len(r)==2:
        r = ''
    return total, r

In [0]:
end_inv_by_ws_by_brand = get_ending_inv_month(month='202412', used_qty_col='期末库存')

没对应上SKUCode/HL的wccs product id ['P02816' 'P02747' 'P02746' 'P02931' 'P03012' 'P03002' 'P02896' 'P02820'
 'P02650' 'P02843' 'P02708' 'P03001' 'P03071' 'P03020' 'P00012' 'P00007'
 'P01040' 'P00024' 'P00043' 'P00033' 'P00038' 'P00040' 'P00032' 'P00028'
 'P00020' 'P00025' 'P00015' 'P00034' 'P00036' 'P00031' 'P00029' 'P00019'
 'P00014' 'P00027' 'P00035' 'P00830' 'P00211' 'P00022' 'P00030' 'P00627'
 'P02935' 'P03006' 'P02757' 'P00315' 'P00047' 'P00045' 'P00076' 'P00046'
 'P00055' 'P00098' 'P00053' 'P00051' 'P00050' 'P00064' 'P00062' 'P00058'
 'P00059' 'P01018' 'P00061' 'P00063' 'P00867' 'P00078' 'P02791' 'P02799'
 'P00073' 'P00074' 'P00056' 'P00065' 'P00067' 'P00072' 'P00057' 'P00233'
 'P00408' 'P00329' 'P00307' 'P00267' 'P00314' 'P00272' 'P00317' 'P00359'
 'P00444' 'P00481' 'P00343' 'P00704' 'P00249' 'P00301' 'P00254' 'P00260'
 'P00327' 'P00273' 'P00278' 'P00446' 'P00299' 'P00316' 'P00225' 'P00633'
 'P00401' 'P00631' 'P00469' 'P00295' 'P00653' 'P00558' 'P00231' 'P00854'
 'P00585' 'P00308' '

In [0]:
expire_month = 12
data_type = 't1'    # or t15
end_month = -3   # 数据的最后一个月，202412   -3是202410

reading: /mnt/srf/inv/fifo_inventory_details_month12_t1_to_all_v2025010302
reading: /mnt/srf/inv/fifo_inventory_details_month12_t15_to_poc_write_off_v20250107


In [0]:
if data_type == "t1":
    # data_version加"_no_str"是没有str的经销商的结果
    data_version = 'v2025010302'
    t1_t15_data_version = 'v20250107'

    t1_name = f'/mnt/srf/inv/fifo_inventory_details_month{expire_month}_t1_to_all_{data_version}'
    t15_name = f'/mnt/srf/inv/fifo_inventory_details_month{expire_month}_t15_to_poc_write_off_{t1_t15_data_version}'

    # # 未来使用以下路径
    # t1_name = f'/mnt/srf/inv/fifo_inventory_details_month{expire_month}_t1_{data_version}'
    # t15_name = f'/mnt/srf/inv/fifo_inventory_details_month{expire_month}_t1_t15_{t1_t15_data_version}'

    t1_detail = read_detail(t1_name)
    t15_detail = read_detail(t15_name)
    t1_df = get_one_month_result(t1_detail, last_month_n=end_month, is_t15_wf=False)
    t15_detail['t15_code'] = t15_detail['t15_code'].astype(str)
    t15_wf_df = get_one_month_result(t15_detail, last_month_n=end_month, is_t15_wf=True)
    # get_t15_wf_items(t15_wf_df, '30018524', 'BUD')
    t1_df[['w1.5_writeoff', 'w1.5_wrriteoff_detail']] = t1_df.apply(lambda x: get_t15_wf_items(t15_wf_df, x['ws_code'], x['brand']), axis=1, result_type='expand')

current calculating month 202410
current calculating month 202410


In [0]:
if data_type == 't1':
    t1_summary_final = pd.merge(t1_df, end_inv_by_ws_by_brand, left_on=['ws_code', 'brand'], right_index=True, how='left')
    t1_summary_final = t1_summary_final.rename(columns={'w1.5 writeoff': 'w1.5_writeoff', 'w1.5 wrriteoff detail': 'w1.5_wrriteoff_detail'})
    t1_summary_final['t1_self_write_off'] = t1_summary_final['write_off_stw'] - t1_summary_final['w1.5_writeoff']
    t1_summary_final['min_write_off'] = t1_summary_final[['t1_self_write_off', 'inv_hl']].min(axis=1)
    t1_summary_final.display()

ws_name,ws_code,bu,region,brand,expire_month,total_in_stw,write_off_stw,write_off_amt_pct,write_off_month_cnt,has_stw_month,write_off_month_pct,valid_stw_balance,end_month,w1.5_writeoff,w1.5_wrriteoff_detail,inv_hl,t1_self_write_off,min_write_off
七台河兴龙批发部,30012455,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,HBO,12,2977.5600000000004,609.3408000000002,0.2046443396606617,3.0,7.0,0.4285714285714285,0.0,202410,0.0,,0.0,609.3408000000002,0.0
七台河市瀛和商贸有限公司,30017751,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,BUD,12,21263.224800000007,128.3075999999984,0.0060342493298569,1.0,37.0,0.027027027027027,1569.96,202410,0.0875999999999948,"{""30015510"": 0.0875999999999948}",0.0,128.21999999999838,0.0
七台河市瀛和商贸有限公司,30017751,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,HBI,12,14962.840000000002,893.2496000000003,0.0596978648438398,2.0,37.0,0.054054054054054,951.98,202410,0.0,,0.0,893.2496000000003,0.0
七台河市瀛和商贸有限公司,30017751,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,HBO,12,148800.23119999998,5971.884800000006,0.0401335720505251,2.0,37.0,0.054054054054054,12162.12,202410,1543.4640000000002,"{""30015510"": 1543.4640000000002}",0.0,4428.420800000006,0.0
七台河市瀛和商贸有限公司,30017751,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,HKOW,12,2422.438,624.9568,0.2579867059549099,9.0,28.0,0.3214285714285714,134.57,202410,0.0,,0.0,624.9568,0.0
七台河市瀛和商贸有限公司,30017751,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,ISP,12,3366.222,193.372,0.0574448149884351,4.0,34.0,0.1176470588235294,218.36,202410,64.04296000000002,"{""30017246"": 20.47152, ""30012510"": 10.193519999999998, ""30012506"": 33.37792000000002}",0.0,129.32904,0.0
七台河市瀛泉商贸有限公司,30020605,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,BUD,12,8380.029999999999,0.0,0.0,0.0,10.0,0.0,0.0,202410,0.0,,1555.07328,0.0,0.0
七台河市瀛泉商贸有限公司,30020605,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,HBI,12,4494.110000000001,0.0,0.0,0.0,10.0,0.0,0.0,202410,0.0,,326.1504,0.0,0.0
七台河市瀛泉商贸有限公司,30020605,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,HBO,12,38432.27,0.0,0.0,0.0,10.0,0.0,4522.696400000001,202410,0.0,,5691.556799999999,0.0,0.0
七台河市瀛泉商贸有限公司,30020605,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,HKOW,12,158.26000000000002,0.0,0.0,0.0,3.0,0.0,0.0,202410,0.0,,42.3264,0.0,0.0


In [0]:
# 加上挂在t1下面的t1.5的write off detail以及细节，非加总
if data_type == 't1':
    t15_wf_df['ws_code'] = t15_wf_df['t1_code'].astype(int)
    t15_summary_final = pd.merge(t1_summary_final[['ws_code', 'brand']], t15_wf_df, how='left', on=['ws_code', 'brand']).dropna(subset=['t15_code'])
    t15_summary_final['is_write_off_gt_1H'] = t15_summary_final['write_off_stw'] > 0.01
    t15_summary_final['t15_code'] = t15_summary_final['t15_code'].astype(int)
    t15_summary_final = pd.merge(t15_summary_final, end_inv_by_ws_by_brand, left_on=['t15_code', 'brand'], right_index=True, how='left')
    t15_summary_final['min_write_off'] = t15_summary_final[['write_off_stw', 'inv_hl']].min(axis=1)
    t15_summary_final.display()

ws_code,brand,ws_name,t1_code,t15_code,bu,region,expire_month,total_in_stw,write_off_stw,write_off_amt_pct,write_off_month_cnt,has_stw_month,write_off_month_pct,valid_stw_balance,end_month,is_write_off_gt_1H,inv_hl,min_write_off
30017751,BUD,七台河市新兴区鸿丽酒水经销处,30017751,30015510,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,12.0,425.6016,0.0875999999999948,0.00020582629388610097,1.0,21.0,0.0476190476190476,0.0,202410,True,0.0,0.0
30017751,HBO,七台河市新兴区鸿丽酒水经销处,30017751,30015510,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,12.0,1626.1439999999998,1543.4640000000002,0.9491557943207984,4.0,10.0,0.4,0.0,202410,True,0.0,0.0
30017751,ISP,七台河市新兴区鸿丽酒水经销处,30017751,30015510,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,12.0,5.039999999999999,0.0,0.0,0.0,3.0,0.0,0.0,202410,False,0.0,0.0
30017751,ISP,勃利县昌顺啤酒销售有限公司,30017751,30017246,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,12.0,201.67752,20.47152,0.1015062065420082,1.0,23.0,0.0434782608695652,0.0,202410,True,17.244,17.244
30017751,ISP,密山市东义啤酒销售有限公司,30017751,30019025,东北|BU_NE,黑龙江南区|Region_Heilongjiang_South,12.0,5.76,0.0,0.0,0.0,4.0,0.0,0.0,202410,False,0.0,0.0
30017751,ISP,虎林市鸿众酒业有限公司,30017751,30012510,东北|BU_NE,黑龙江南区|Region_Heilongjiang_South,12.0,37.57151999999999,10.193519999999998,0.2713097580294861,3.0,15.0,0.2,0.0,202410,True,11.952,10.193519999999998
30017751,ISP,鸡西伟海酒业销售有限公司,30017751,30012506,东北|BU_NE,黑龙江南区|Region_Heilongjiang_South,12.0,1122.4643199999996,33.37792000000002,0.0297362859605194,2.0,23.0,0.0869565217391304,0.0,202410,True,0.0,0.0
30020605,ISP,勃利县昌顺啤酒销售有限公司,30020605,30017246,东北|BU_NE,黑龙江东区|Region_Heilongjiang_East,12.0,51.413520000000005,20.47152,0.398173865551318,1.0,6.0,0.1666666666666666,0.0,202410,True,17.244,17.244
30020644,BUD,张家口一牛商贸有限公司,30020644,30020398,北区|BU_N,河北北区|Region_Hebei_North,12.0,228.528,0.0,0.0,0.0,3.0,0.0,0.0,202410,False,19.8528,0.0
30003079,BUD,上海吟纹贸易有限公司,30003079,30019831,东区|BU_E,上海市|Region_Shanghai,12.0,3750.0,0.0,0.0,0.0,10.0,0.0,0.0,202410,False,420.9,0.0


In [0]:
# t1.5的总进货（不管从哪里进货ABI or T1）；总卖给poc的str
if data_type == 't15':
    data_version = 'v20250108'
    name = f'/mnt/srf/inv/fifo_inventory_details_month{expire_month}_new_t15_poc_{data_version}'
    # 以后用下面这个路径
    # name = f'/mnt/srf/inv/fifo_inventory_details_month{expire_month}_t15_{data_version}'
    result = read_detail(name)
    end_month = -1   # 数据的最后一个月，202412
    t15_df = get_one_month_result(result, last_month_n=end_month, is_t15_wf=False)  # overview
    t15_df_summary = pd.merge(t15_df, end_inv_by_ws_by_brand, left_on=['ws_code', 'brand'], right_index=True, how='left')  
    t15_df_summary['min_write_off'] = t15_df_summary[['write_off_stw', 'inv_hl']].min(axis=1)
    # 落表
    table_name = f"finance_ds_inventory_dmt.fifo_inv_overview_{expire_month}_{data_version}_t15_full"
    spark.createDataFrame(t15_df_summary).write.mode("overwrite").format("delta").option('overwriteSchema',True).saveAsTable(table_name)
    print(table_name)

reading: /mnt/srf/inv/fifo_inventory_details_month12_new_t15_poc_v20250108


In [0]:
table_name = f"finance_ds_inventory_dmt.fifo_inv_overview_{expire_month}_{data_version}"
spark.createDataFrame(t1_summary_final).write.mode("overwrite").format("delta").option('overwriteSchema',True).saveAsTable(table_name)
print(table_name)

table_name = f"finance_ds_inventory_dmt.fifo_inv_overview_t1_t15_{expire_month}_{t1_t15_data_version}"
spark.createDataFrame(t15_summary_final).write.mode("overwrite").format("delta").option('overwriteSchema',True).saveAsTable(table_name)
print(table_name)

finance_ds_inventory_dmt.fifo_inv_overview_t1_t15_12_v20250107


In [0]:
%sql
-- delete from finance_ds_inventory_dmt.fifo_inv_overview_t1_t15_12_v2025010302 where 1=1
-- drop table finance_ds_inventory_dmt.fifo_inv_overview_t1_t15_12_v2025010302

In [0]:
payercode =  30012451
brand = 'HBO'
expire_month = 9
one_ws_inv = result.query('(ws_code == @payercode) & (brand == @brand) & (expire_month == @expire_month)').drop(['ws_code', 'brand', 'expire_month', 'ws_name', 'bu', 'region'], axis=1).set_index('index')
one_ws_inv

Unnamed: 0_level_0,Openning Balance,202101,202102,202103,202104,202105,202106,202107,202108,202109,202110,202111,202112,202201,202202,202203,202204,202205,202206,202207,202208,202209,202210,202211,202212,202301,202302,202303,202304,202305,202306,202307,202308,202309,202310,202311,202312,202401,202402,202403,202404,202405,202406,202407,202408,202409,202410
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1


In [0]:
t1_detail.query('index == "STW(+)"').set_index(['ws_code', 'brand', 'expire_month', 'ws_name', 'bu', 'region', 'index']).sum()[:-2]

Openning Balance    7.912355e+05
202101              1.509472e+06
202102              6.694327e+05
202103              1.008184e+06
202104              9.666091e+05
202105              1.227183e+06
202106              1.262082e+06
202107              1.248418e+06
202108              1.234563e+06
202109              1.048322e+06
202110              6.654848e+05
202111              8.740087e+05
202112              7.552920e+05
202201              1.456576e+06
202202              6.899766e+05
202203              9.015778e+05
202204              6.394719e+05
202205              1.094486e+06
202206              1.335303e+06
202207              1.324297e+06
202208              1.314656e+06
202209              9.518928e+05
202210              7.300773e+05
202211              6.015377e+05
202212              8.084582e+05
202301              1.309832e+06
202302              8.062214e+05
202303              1.054930e+06
202304              9.808261e+05
202305              1.128805e+06
202306    