# Q4 POS/渠道异常分析

目标（依据PDF示例提示）：
- 4.1 店间调拨（inter-store transfer）识别与样例展示；
- 4.2 商品报废（goods scrapped）识别与原因统计；
- 4.3 免费礼物（Gift_*）识别与交易展示；
- 4.4 异常交易日期/时间：周末、过早/过晚或时间缺失。

开发阶段采用部分行（例如20万）进行分析与可视化。

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_columns', 200)
pd.set_option('display.width', 200)

pos_sum = pd.read_excel('/workspace/Sample Data.Case 4.xlsx', sheet_name='POS Trans_Summary', nrows=200000)
flag_col_transfer = next((c for c in pos_sum.columns if 'inter-store transfer' in str(c).lower()), None)
flag_col_scrap = next((c for c in pos_sum.columns if 'goods scrapped' in str(c).lower() and 'reason' not in str(c).lower()), None)
reason_col = next((c for c in pos_sum.columns if 'scrapped_reason' in str(c).lower()), None)

print('列名样例：', list(pos_sum.columns)[:30])


## 4.1 店间调拨

In [None]:
if flag_col_transfer:
    trans = pos_sum[pos_sum[flag_col_transfer]==1]
    print('店间调拨（子集）条数：', trans.shape[0])
    display(trans[['Transaction_ID','Trans_Date','Trans_Start_Time','SUM_TRANS',flag_col_transfer]].head(10))
else:
    print('未发现店间调拨标志列')

## 4.2 商品报废与原因

In [None]:
if flag_col_scrap:
    scr = pos_sum[pos_sum[flag_col_scrap]==1]
    cols = ['Transaction_ID','Trans_Date','Trans_Start_Time','SUM_TRANS'] + ([reason_col] if reason_col else [])
    print('商品报废（子集）条数：', scr.shape[0])
    display(scr[cols].head(10))
    if reason_col:
        plt.figure(figsize=(8,4))
        scr_reason = scr[reason_col].value_counts().head(10)
        sns.barplot(x=scr_reason.index.astype(str), y=scr_reason.values, color='darkorange')
        plt.xticks(rotation=45, ha='right')
        plt.title('报废原因Top10（子集）')
        plt.ylabel('次数')
        plt.xlabel('原因编码/描述')
        plt.tight_layout()
        plt.show()
else:
    print('未发现报废标志列')

## 4.3 免费礼物交易

In [None]:
gift_cols = [c for c in pos_sum.columns if 'Gift_' in str(c) or 'Gift ' in str(c)]
pos_sum['any_gift'] = pos_sum[gift_cols].fillna(0).sum(axis=1) > 0 if gift_cols else False
if gift_cols:
    gifts = pos_sum[pos_sum['any_gift']]
    print('含礼物交易（子集）条数：', gifts.shape[0])
    show_cols = ['Transaction_ID','Trans_Date','Trans_Start_Time','SUM_TRANS'] + gift_cols[:10]
    display(gifts[show_cols].head(10))
    plt.figure(figsize=(10,4))
    gift_counts = gifts[gift_cols].sum().sort_values(ascending=False).head(10)
    sns.barplot(x=gift_counts.index, y=gift_counts.values, color='green')
    plt.xticks(rotation=45, ha='right')
    plt.title('Gift类目Top10出现次数（子集）')
    plt.ylabel('次数')
    plt.xlabel('Gift列')
    plt.tight_layout()
    plt.show()
else:
    print('未发现Gift相关列')

## 4.4 异常交易日期/时间

In [None]:
pos_sum['Trans_Date'] = pd.to_datetime(pos_sum['Trans_Date'], errors='coerce')
pos_sum['hour'] = pd.to_datetime(pos_sum['Trans_Start_Time'], errors='coerce').dt.hour
abn = pos_sum[(pos_sum['Trans_Date'].dt.dayofweek>=5) | (pos_sum['hour'].isna()) | (pos_sum['hour']<6) | (pos_sum['hour']>22)]
print('异常日期/时间（子集）条数：', abn.shape[0])
display(abn[['Transaction_ID','Trans_Date','Trans_Start_Time','hour','SUM_TRANS']].head(10))

plt.figure(figsize=(8,4))
sns.histplot(pos_sum['hour'].dropna(), bins=24, color='slateblue')
plt.title('交易发生小时分布（子集）')
plt.xlabel('小时')
plt.ylabel('频次')
plt.tight_layout()
plt.show()

### 结果与审计建议
- 店间调拨与报废应有完整审批与原因记录，建议抽样核查流程合规性与权限控制。
- 含免费礼物交易需与促销政策匹配，检查是否存在超范围赠送或非授权操作。
- 异常日期/时间交易需结合门店营业时间与值班安排，排查是否存在手工补录或不合规操作。

## 附加：全量运行与结果导出

In [None]:
import pandas as pd
from pathlib import Path
outputs_path = Path('/workspace/KPMG_HW1/outputs')
outputs_path.mkdir(parents=True, exist_ok=True)

pos_sum_all = pd.read_excel('/workspace/Sample Data.Case 4.xlsx', sheet_name='POS Trans_Summary')
flag_col_transfer = next((c for c in pos_sum_all.columns if 'inter-store transfer' in str(c).lower()), None)
flag_col_scrap = next((c for c in pos_sum_all.columns if 'goods scrapped' in str(c).lower() and 'reason' not in str(c).lower()), None)
reason_col = next((c for c in pos_sum_all.columns if 'scrapped_reason' in str(c).lower()), None)

# 导出店间调拨
if flag_col_transfer:
    trans_all = pos_sum_all[pos_sum_all[flag_col_transfer]==1]
    trans_all.to_csv(outputs_path/'Q4_inter_store_transfers.csv', index=False)
# 导出报废商品
if flag_col_scrap:
    scr_all = pos_sum_all[pos_sum_all[flag_col_scrap]==1]
    scr_all.to_csv(outputs_path/'Q4_goods_scrapped.csv', index=False)
    if reason_col:
        scr_reason_counts = scr_all[reason_col].value_counts().reset_index()
        scr_reason_counts.columns = ['reason','count']
        scr_reason_counts.to_csv(outputs_path/'Q4_goods_scrapped_reason_counts.csv', index=False)
# 导出含礼物交易
gift_cols_all = [c for c in pos_sum_all.columns if 'Gift_' in str(c) or 'Gift ' in str(c)]
pos_sum_all['any_gift'] = pos_sum_all[gift_cols_all].fillna(0).sum(axis=1) > 0 if gift_cols_all else False
if gift_cols_all:
    gifts_all = pos_sum_all[pos_sum_all['any_gift']]
    gifts_all.to_csv(outputs_path/'Q4_gifts_transactions.csv', index=False)
# 异常日期时间
pos_sum_all['Trans_Date'] = pd.to_datetime(pos_sum_all['Trans_Date'], errors='coerce')
pos_sum_all['hour'] = pd.to_datetime(pos_sum_all['Trans_Start_Time'], errors='coerce').dt.hour
abn_all = pos_sum_all[(pos_sum_all['Trans_Date'].dt.dayofweek>=5) | (pos_sum_all['hour'].isna()) | (pos_sum_all['hour']<6) | (pos_sum_all['hour']>22)]
abn_all.to_csv(outputs_path/'Q4_abnormal_datetime.csv', index=False)
print('导出完成：店间调拨、报废、礼物、异常时间')