In [4]:
import pandas as pd
import statsmodels.api as sm
import numpy as np

# 1. 读数据 -------------------------------------------------
file_path = r"D:\课堂\大三上\回归分析\Newspaper.xlsx"
df = pd.read_excel(file_path)
print(df.columns.tolist())
# 这里假定列名就是这两个
y = df["Sunday"]
X = df["Daily"]

# 2. 拟合一元回归：Sunday = β0 + β1 * Daily -------------------
X_const = sm.add_constant(X)
model = sm.OLS(y, X_const).fit()

# 3. 做影响力分析 ----------------------------------------------
infl = model.get_influence()

# 杠杆值 (hat values)
leverage = infl.hat_matrix_diag

# 学生化残差（外部studentized residuals，更适合做异常点检测）
stud_resid = infl.resid_studentized_external

# 4. 设定阈值 --------------------------------------------------
n = len(df)      # 样本量
p = 1            # 自变量个数（一元回归）

# 杠杆点阈值：常用 2(p+1)/n 或 3(p+1)/n，这里用 2 的版本
leverage_threshold = 2 * (p + 1) / n   # = 4/n

# 异常点阈值：|t| > 2
outlier_threshold = 2

# 5. 找出杠杆点和异常点的行号 -----------------------------------
leverage_mask = leverage > leverage_threshold
outlier_mask = np.abs(stud_resid) > outlier_threshold

# 6. 把结果拼回原表，方便看 -------------------------------------
df_result = df.copy()
df_result["leverage"] = leverage
df_result["stud_resid"] = stud_resid
df_result["is_leverage"] = leverage_mask
df_result["is_outlier"] = outlier_mask

# 7. 输出 ------------------------------------------------------
print("\n=== 杠杆点(Leverage points) ===")
leverage_points = df_result[df_result["is_leverage"]]
print(leverage_points)

print("\n=== 异常点(Outliers, by |studentized resid|>2) ===")
outlier_points = df_result[df_result["is_outlier"]]
print(outlier_points)

# 你也可以同时满足两个条件的点
both = df_result[df_result["is_leverage"] & df_result["is_outlier"]]
print("\n=== 同时是杠杆点又是异常点的 ===")
print(both)


['Newspaper', 'Daily', 'Sunday']

=== 杠杆点(Leverage points) ===
            Newspaper     Daily    Sunday  leverage  stud_resid  is_leverage  \
13  Los Angeles Times  1164.388  1531.527  0.254323   -0.441554         True   
18     New York Times  1209.225  1762.015  0.282663    1.403740         True   

    is_outlier  
13       False  
18       False  

=== 异常点(Outliers, by |studentized resid|>2) ===
                Newspaper    Daily   Sunday  leverage  stud_resid  \
2           Boston Herald  355.628  235.084  0.031785   -2.569235   
22  Philadelphia Inquirer  515.523  982.663  0.032402    2.859634   
23       Pittsburgh Press  220.465  557.000  0.047938    2.505007   

    is_leverage  is_outlier  
2         False        True  
22        False        True  
23        False        True  

=== 同时是杠杆点又是异常点的 ===
Empty DataFrame
Columns: [Newspaper, Daily, Sunday, leverage, stud_resid, is_leverage, is_outlier]
Index: []
