In [None]:
import os
import tempfile
from pathlib import Path
from copy import copy
import win32com.client as win32
from openpyxl import load_workbook
from openpyxl.styles import PatternFill
import pandas as pd

# ==== 设置路径 ====
inp = Path(r'.\data\原始数据模板_财务分析_2025Q1.xlsx').resolve()
out = Path(r'.\data\原始数据模板_财务分析_2025Q1_v1.xlsx').resolve()

# ==== 指标列表 ====
new_metrics = [
    '流动比率（扣除预收账款）', '现金比率（扣除预收账款）', '资产负债率', 'Net Gearing',
    '应收/总资产', '货币资金/有息负债',
    '同比',
    '应收款', '应收账款', '预收账款', '现金', '存货', '应付票据和账款（短期）',
    '环比',
    '应收账款', '其他应收款', '固定资产', '资产总计', '应付票据和账款（短期）', '存货', '现金(环比）', '应收变化/收入变化',
    '杜邦分析',
    'ROE(pro forma)', '净利率', '总资产周转率', '杠杆', '存货周转率', '应收账款周转率'
]

# ==== 定义计算公式 (注意：公式中用到 shift() 要求 df 的行索引为默认整数索引) ====
formulas = {
    '流动比率（扣除预收账款）': lambda d: d.iloc[37] / (d.iloc[105] - d.iloc[79]),
    '现金比率（扣除预收账款）': lambda d: d.iloc[9]  / (d.iloc[105] - d.iloc[79]),
    '资产负债率':             lambda d: d.iloc[123] / d.iloc[71],
    'Net Gearing':            lambda d: (
                                    d.iloc[73] + d.iloc[89] + d.iloc[107] +
                                    d.iloc[77] + d.iloc[78] + d.iloc[93] +
                                    d.iloc[92] + d.iloc[108] - d.iloc[9]
                                 ) / d.iloc[140],
    '应收/总资产':             lambda d: (
                                    d.iloc[13] + d.iloc[14] + d.iloc[15] + d.iloc[17]
                                 ) / d.iloc[71],
    '货币资金/有息负债':       lambda d: d.iloc[9] / (
                                    d.iloc[73] + d.iloc[92] + d.iloc[107] + d.iloc[108]
                                 ),
    # 同比
    '应收款':     lambda d: (d.iloc[13] + d.iloc[14] + d.iloc[15] + d.iloc[17]) / 
                              (d.iloc[13].shift(4) + d.iloc[14].shift(4) + d.iloc[15].shift(4) + d.iloc[17].shift(4)) - 1,
    '应收账款':   lambda d: d.iloc[14] / d.iloc[14].shift(4) - 1,
    '预收账款':   lambda d: d.iloc[79] / d.iloc[79].shift(4) - 1,
    '现金':       lambda d: d.iloc[9]  / d.iloc[9].shift(4) - 1,
    '存货':       lambda d: d.iloc[22] / d.iloc[22].shift(4) - 1,
    '应付票据和账款（短期）': lambda d: (d.iloc[77] + d.iloc[78]) / (d.iloc[77].shift(4) + d.iloc[78].shift(4)) - 1,
    # 环比
    '应收账款（环比）':   lambda d: d.iloc[14] / d.iloc[14].shift(1) - 1,
    '其他应收款': lambda d: d.iloc[17] / d.iloc[17].shift(1) - 1,
    '固定资产':   lambda d: d.iloc[51] / d.iloc[51].shift(1) - 1,
    '资产总计':   lambda d: d.iloc[71] / d.iloc[71].shift(1) - 1,
    '应付票据和账款（短期）（环比）': lambda d: (d.iloc[77] + d.iloc[78]) / (d.iloc[77].shift(1) + d.iloc[78].shift(1)) - 1,
    '存货（环比）':       lambda d: d.iloc[22] / d.iloc[22].shift(1) - 1,
    '现金(环比）': lambda d: d.iloc[9]  / d.iloc[9].shift(1) - 1,
}

percentage_metrics = {
    '资产负债率',
    'Net Gearing',
    '应收/总资产',
    '应收款',
    '应收账款',
    '预收账款',
    '现金',
    '存货',
    '应付票据和账款（短期）'
}

# ==== 修改后的 add_new_metrics 函数
def add_new_metrics(ws_formula, new_metrics):
    max_row = ws_formula.max_row
    max_col = ws_formula.max_column

    # 模板样式：取最后两行作为“白底”与“灰底”模板（假设已经交错）
    row_white = ws_formula[max_row - 1]
    row_gray = ws_formula[max_row]
    assert len(row_white) == len(row_gray)

    # 指定哪些指标后要额外插入空行
    insert_blank_after = {
        '货币资金/有息负债',
        '应付票据和账款（短期）',
        '应收变化/收入变化',
        '杠杆'
    }

    green_fill_metrics = {'现金', '存货', '现金(环比）', '应收变化/收入变化'}
    yellow_fill_metrics = {'存货周转率', '应收账款周转率'}
    green_fill = PatternFill(fill_type="solid", start_color="C6EFCE", end_color="C6EFCE")
    yellow_fill = PatternFill(fill_type="solid", start_color="FFF2CC", end_color="FFF2CC")

    # --- 插入前两行空白行（保证插入区域上方空出两行） ---
    current_row = max_row + 1
    for _ in range(2):
        for col in range(1, max_col + 1):
            ref_cell = row_white[col - 1]
            cell = ws_formula.cell(row=current_row, column=col)
            cell.value = '' if col == 1 else None
            cell.font = copy(ref_cell.font)
            cell.alignment = copy(ref_cell.alignment)
            cell.border = copy(ref_cell.border)
            cell.fill = copy(ref_cell.fill)
            cell.number_format = copy(ref_cell.number_format)
        current_row += 1

    # --- 插入指标行，并记录每个指标所在的行号 ---
    row_map = {}  # 字典：指标名称 -> 所在行
    use_gray = False  # 交替使用模板行
    for metric in new_metrics:
        template_row = row_gray if use_gray else row_white
        use_gray = not use_gray
        # 写入指标行
        for col in range(1, max_col + 1):
            ref_cell = template_row[col - 1]
            cell = ws_formula.cell(row=current_row, column=col)
            cell.value = metric if col == 1 else None
            cell.font = copy(ref_cell.font)
            cell.alignment = copy(ref_cell.alignment)
            cell.border = copy(ref_cell.border)
            cell.fill = copy(ref_cell.fill)
            cell.number_format = copy(ref_cell.number_format)
            # 如在第一列需要特殊颜色覆盖
            if col == 1:
                if metric in green_fill_metrics:
                    cell.fill = green_fill
                elif metric in yellow_fill_metrics:
                    cell.fill = yellow_fill
        # 记录当前行号
        row_map[metric] = current_row
        current_row += 1
        # 如果该指标后需要额外空行，则插入一行空行（保留模板样式）
        if metric in insert_blank_after:
            for col in range(1, max_col + 1):
                ref_cell = row_white[col - 1]
                cell = ws_formula.cell(row=current_row, column=col)
                cell.value = '' if col == 1 else None
                cell.font = copy(ref_cell.font)
                cell.alignment = copy(ref_cell.alignment)
                cell.border = copy(ref_cell.border)
                cell.fill = copy(ref_cell.fill)
                cell.number_format = copy(ref_cell.number_format)
            current_row += 1
    return row_map

# ==== 步骤 1：用 Excel 自动清除外链 ====
def break_external_links(input_path: str) -> str:
    excel = win32.gencache.EnsureDispatch('Excel.Application')
    excel.Visible = False
    excel.DisplayAlerts = False
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
    tmp.close()
    tmp_path = tmp.name

    wb = excel.Workbooks.Open(Filename=input_path, UpdateLinks=0, ReadOnly=True)
    links = wb.LinkSources(Type=1)
    if links:
        for link in links:
            wb.BreakLink(Name=link, Type=1)
    wb.SaveAs(tmp_path, FileFormat=51)  # 明确指定 xlsx 格式
    wb.Close()
    excel.Quit()
    return tmp_path

# ==== 步骤 2：数据处理并保存新文件 ====
import pandas as pd

def process_excel(cleaned_path: str, output_path: str):
    wb_formula = load_workbook(cleaned_path)
    wb_values = load_workbook(cleaned_path, data_only=True)
    ws_formula = wb_formula.active
    ws_values = wb_values.active

    # 替换 B3 为显示值
    ws_formula['B3'].value = ws_values['B3'].value

    # 验证 A5 是否正确
    if str(ws_values['A5'].value).strip() != '资产负债表(ORIG,元)':
        raise ValueError(f"第5行第1列非预期: {ws_values['A5'].value}")

    # 插入新指标并获取行号映射
    row_map = add_new_metrics(ws_formula, new_metrics)

    # === 读取原始数据并转换为数值 ===
    df_raw = pd.read_excel(cleaned_path, header=None)
    df = df_raw.iloc[:, 1:].apply(pd.to_numeric, errors='coerce').fillna(0)

    # === 计算公式 ===
    computed_results = {}
    for metric, func in formulas.items():
        try:
            result = func(df)
            computed_results[metric] = result
        except Exception as e:
            print(f"计算 {metric} 出错: {e}")
            computed_results[metric] = None

    # === 写入结果 ===
    for metric, row in row_map.items():
        if metric in computed_results:
            values = computed_results[metric]
            if isinstance(values, pd.Series):
                for i, v in enumerate(values):
                    cell = ws_formula.cell(row=row, column=2 + i)
                    if pd.notna(v):
                        # 是否是百分比指标
                        if metric in percentage_metrics:
                            cell.value = float(v) * 100
                            cell.number_format = '0.0%'  # 百分比格式，显示 25.33%
                        else:
                            cell.value = float(v)
                            cell.number_format = '0.00'
                    else:
                        cell.value = None
            else:
                cell = ws_formula.cell(row=row, column=2)
                if pd.notna(values):
                    if metric in percentage_metrics:
                        cell.value = float(values) * 100
                        cell.number_format = '0.0%'
                    else:
                        cell.value = float(values)
                        cell.number_format = '0.00'
                else:
                    cell.value = None

    '''
    for metric, row in row_map.items():
        if metric not in computed_results or computed_results[metric] is None:
            continue
        values = computed_results[metric]

        # 支持标量和 Series
        if isinstance(values, (int, float)):
            ws_formula.cell(row=row, column=2).value = values
        elif isinstance(values, pd.Series):
            for i, v in enumerate(values):
                ws_formula.cell(row=row, column=2 + i).value = v
        else:
            print(f"⚠️ 未知格式: {metric} -> {type(values)}")
    '''
    wb_formula.save(output_path)
    print(f"成功保存为: {output_path}")


# ==== 主程序 ====
if not inp.exists():
    raise FileNotFoundError(f"找不到文件: {inp}")

# 断开外链 -> 处理 -> 清理中间文件
temp_cleaned = break_external_links(str(inp))
try:
    process_excel(temp_cleaned, str(out))
finally:
    os.remove(temp_cleaned)
    print("已删除中间文件。")


成功保存为: E:\code\Simple-Bot-for-Stock-and-Financial-Report-Data-Processing\data\原始数据模板_财务分析_2025Q1_v1.xlsx
已删除中间文件。


In [14]:
import pandas as pd

# 读取 Excel 文件
df = pd.read_excel(r'.\data\原始数据模板_财务分析_2025Q1.xlsx', header=None)
df = df.fillna(0)  # 如果你确认业务逻辑允许

# 检查第 2 列中感兴趣的行
rows_to_check = [37, 79, 105]
col_index = 1  # 第二列（索引从 0 开始）

for row in rows_to_check:
    value = df.iloc[row, col_index]
    print(f"第 {row+1} 行，第 2 列的值：{value}，类型：{type(value)}")
try:
    v1 = df.iloc[37]
    v2 = df.iloc[105]
    v3 = df.iloc[79]
    print(f"类型检查：d.iloc[37]={v1} ({type(v1)}), d.iloc[105]={v2} ({type(v2)}), d.iloc[79]={v3} ({type(v3)})")
    result = v1 / (v2 - v3)
    print(f"计算结果：{result}")
except Exception as e:
    print(f"计算 流动比率（扣除预收账款） 出错: {e}")


第 38 行，第 2 列的值：2325701337.46，类型：<class 'float'>
第 80 行，第 2 列的值：0，类型：<class 'int'>
第 106 行，第 2 列的值：1436504702.25，类型：<class 'float'>
类型检查：d.iloc[37]=0             流动资产合计
1      2325701337.46
2      2381879275.58
3      2211056219.37
4      2102845221.71
5       1998458114.9
6      2106340791.52
7      2011704741.89
8      2361985140.53
9      1652413558.91
10      1717018998.7
11      1618857233.9
12     1553672497.63
13     1478642357.82
14     1556904534.48
15     1681374992.68
16     1640783862.88
17     1660965477.53
18     1782056307.86
19     1430773452.11
20     1427531047.02
21     1431478980.08
22     1286892411.07
23     1220736237.81
24     1272326680.17
25     1267958806.99
26     1303136236.63
27     1443810903.03
28      1750658795.6
29     1580027963.43
30      779345291.09
31       718153932.8
32      623017726.04
33      676618099.72
34      644010032.59
35      579355505.95
36      739893405.57
37      714292708.61
38      740633752.25
39     1046227601.74
40     100807