In [7]:
import pandas as pd
import numpy as np

# 1. 模拟数据：包含 P/E 和 行业分类
data = {
    'Stock': [f'S{i}' for i in range(1, 11)],
    'PE_Ratio': [15.2, 8.1, 22.5, 9.0, 11.5, 35.0, 18.3, 7.8, 14.1, 10.9],
    'Sector': ['Tech', 'Energy', 'Tech', 'Energy', 'Finance', 
               'Tech', 'Finance', 'Energy', 'Tech', 'Finance']
}
df = pd.DataFrame(data).set_index('Stock')
N = len(df)

In [8]:
# --- A. 原始因子信号构建（Factor Signal S）---

# 1. 排名 (Rank)：ascending=True 让最小的 P/E 获得 Rank 1
# P/E 越小，排名越靠前
df['PE_Rank'] = df['PE_Ratio'].rank(method='average', ascending=True) 

# 2. 反向转换：得到原始因子信号 S (让低 P/E 获得高分)
# S = N + 1 - Rank
df['Factor_Signal_S'] = N + 1 - df['PE_Rank']
print("--- 步骤 A 结果：原始因子信号 S ---")
print(df[['PE_Ratio', 'Sector', 'Factor_Signal_S']].sort_values(by='Factor_Signal_S', ascending=False))

--- 步骤 A 结果：原始因子信号 S ---
       PE_Ratio   Sector  Factor_Signal_S
Stock                                    
S8          7.8   Energy             10.0
S2          8.1   Energy              9.0
S4          9.0   Energy              8.0
S10        10.9  Finance              7.0
S5         11.5  Finance              6.0
S9         14.1     Tech              5.0
S1         15.2     Tech              4.0
S7         18.3  Finance              3.0
S3         22.5     Tech              2.0
S6         35.0     Tech              1.0


In [9]:
# --- B. 行业中性化与 Z-Score 标准化（Sector Neutralization）---

# 3. 行业中性化 Z-Score 计算：
# 对每个行业组内的 Factor_Signal_S 进行 Z-Score 标准化
# 只有比同行业平均更便宜的股票才能获得高分
df['Value_ZScore_V'] = df.groupby('Sector')['Factor_Signal_S'].transform(
    # x 代表当前行业所有股票的 Factor_Signal_S 分数
    lambda x: (x - x.mean()) / x.std()
)

print("\n--- 步骤 B 结果：行业中性化 Z-Score (Value_ZScore_V) ---")
print(df[['PE_Ratio', 'Sector', 'Value_ZScore_V']].sort_values(by='Value_ZScore_V', ascending=False))


--- 步骤 B 结果：行业中性化 Z-Score (Value_ZScore_V) ---
       PE_Ratio   Sector  Value_ZScore_V
Stock                                   
S9         14.1     Tech        1.095445
S8          7.8   Energy        1.000000
S10        10.9  Finance        0.800641
S1         15.2     Tech        0.547723
S5         11.5  Finance        0.320256
S2          8.1   Energy        0.000000
S3         22.5     Tech       -0.547723
S4          9.0   Energy       -1.000000
S6         35.0     Tech       -1.095445
S7         18.3  Finance       -1.120897
