In [12]:
import numpy as np
from scipy import stats

# 分区数量
partitions = np.array([2, 4, 6, 8, 10, 12, 14, 16])

# 基础版数据
basic_rates = np.array([20.00, 50.00, 50.00, 50.00, 83.33, 66.67, 33.33, 66.67])
basic_compare_times = np.array([147.6, 196.3, 190.1, 210.8, 229.2, 227.9, 245.8, 303.9])  # ms
basic_total_times = np.array([297.8, 396.4, 378.9, 414.9, 466.4, 448.3, 470.6, 600.3])  # 改为 numpy 数组

# 升级版数据
improved_rates = np.array([20.00, 50.00, 50.00, 50.00, 83.33, 66.67, 33.33, 66.67])
improved_compare_times = np.array([10.2, 13.9, 17.6, 18.8, 17.7, 17.6, 18.5, 18.2])  # ms
improved_total_times = np.array([555.8, 481.4, 630.9, 493.9, 655.8, 716.4, 555.7, 607.5])  # 改为 numpy 数组

def calculate_correlation(x, y, method='pearson'):
    """计算相关系数和p值"""
    if method == 'pearson':
        r, p = stats.pearsonr(x, y)
    # elif method == 'spearman':
    #     r, p = stats.spearmanr(x, y)
    else:
        raise ValueError("方法必须是 'pearson' 或 'spearman'")
    return r, p

def print_results(name, rates, compare_times, total_times):
    print(f"\n=== {name}方法 ===")
    
    # 错误检出率分析
    try:
        r_rate, p_rate = calculate_correlation(partitions, rates)
        print(f"分区数量与错误检出率:")
        print(f"  Pearson r = {r_rate:.3f}, p = {p_rate:.4f}")
        print(f"  检出率范围: {rates.min():.2f}% - {rates.max():.2f}%")
    except Exception as e:
        print(f"错误检出率分析出错: {str(e)}")
    
    # 比对时间分析
    try:
        r_compare, p_compare = calculate_correlation(partitions, compare_times, 'pearson')
        print(f"\n分区数量与比对时间:")
        print(f"  Pearson r = {r_compare:.3f}, p = {p_compare:.4f}")
        print(f"  比对时间范围: {compare_times.min():.1f}ms - {compare_times.max():.1f}ms")
        print(f"  时间中位数: {np.median(compare_times):.1f}ms")
    except Exception as e:
        print(f"比对时间分析出错: {str(e)}")

    # 总时间分析
    try:
        r_total, p_total = calculate_correlation(partitions, total_times, 'pearson')
        print(f"\n分区数量与总时间:")
        print(f"  Pearson r = {r_total:.3f}, p = {p_total:.4f}")
        print(f"  总时间范围: {min(total_times):.1f}ms - {max(total_times):.1f}ms")
        print(f"  时间中位数: {np.median(total_times):.1f}ms")
    except Exception as e:
        print(f"总时间分析出错: {str(e)}")

# 计算基础版结果
print_results("基础版", basic_rates, basic_compare_times, basic_total_times)

# 计算升级版结果
print_results("升级版", improved_rates, improved_compare_times, improved_total_times)

# 两种方法比对时间差异检验
try:
    t_stat, p_value = stats.ttest_rel(basic_compare_times, improved_compare_times)
    print(f"\n=== 方法间比对时间差异检验 ===")
    print(f"配对t检验: t = {t_stat:.3f}, p = {p_value:.6f}")
    improvement = basic_compare_times.mean() - improved_compare_times.mean()
    improvement_pct = (improvement / basic_compare_times.mean()) * 100
    print(f"平均改进幅度: {improvement:.1f}ms (降低{improvement_pct:.1f}%)")
except Exception as e:
    print(f"方法间比对时间差异检验出错: {str(e)}")

# 理论复杂度验证 (注释掉可能有问题的部分)
# try:
#     print("\n=== 理论复杂度验证 ===")
#     log_n = np.log(partitions)
#     expected = partitions * log_n
#     norm_compare = (improved_compare_times - improved_compare_times.min()) / (improved_compare_times.max() - improved_compare_times.min())
#     norm_expected = (expected - expected.min()) / (expected.max() - expected.min())
#     r_theory, p_theory = stats.pearsonr(norm_expected, norm_compare)
#     print(f"比对时间与NlogN理论复杂度相关性: r = {r_theory:.3f}, p = {p_theory:.4f}")
# except Exception as e:
#     print(f"理论复杂度验证出错: {str(e)}")


=== 基础版方法 ===
分区数量与错误检出率:
  Pearson r = 0.477, p = 0.2323
  检出率范围: 20.00% - 83.33%

分区数量与比对时间:
  Pearson r = 0.940, p = 0.0005
  比对时间范围: 147.6ms - 303.9ms
  时间中位数: 219.4ms

分区数量与总时间:
  Pearson r = 0.916, p = 0.0014
  总时间范围: 297.8ms - 600.3ms
  时间中位数: 431.6ms

=== 升级版方法 ===
分区数量与错误检出率:
  Pearson r = 0.477, p = 0.2323
  检出率范围: 20.00% - 83.33%

分区数量与比对时间:
  Pearson r = 0.760, p = 0.0285
  比对时间范围: 10.2ms - 18.8ms
  时间中位数: 17.6ms

分区数量与总时间:
  Pearson r = 0.416, p = 0.3051
  总时间范围: 481.4ms - 716.4ms
  时间中位数: 581.6ms

=== 方法间比对时间差异检验 ===
配对t检验: t = 13.117, p = 0.000003
平均改进幅度: 202.4ms (降低92.4%)


In [None]:
import numpy as np
from scipy import stats

# 输入数据
basic_times = np.array([147.6, 196.3, 190.1, 210.8, 229.2, 227.9, 245.8, 303.9])  # 基础版时间(ms)
improved_times = np.array([10.2, 13.9, 17.6, 18.8, 17.7, 17.6, 18.5, 18.2])  # 升级版时间(ms)

# 计算基本统计量
def print_stats(times, name):
    print(f"\n=== {name} ===")
    print(f"平均值: {np.mean(times):.1f} ms")
    print(f"中位数: {np.median(times):.1f} ms")
    print(f"最小值: {np.min(times):.1f} ms")
    print(f"最大值: {np.max(times):.1f} ms")
    print(f"标准差: {np.std(times):.1f} ms")  # 离散程度

print_stats(basic_times, "基础版比对时间")
print_stats(improved_times, "升级版比对时间")

# 计算效率提升
time_reduction = basic_times - improved_times
improvement_ratio = (time_reduction / basic_times) * 100

print("\n=== 效率提升分析 ===")
print(f"平均降低时间: {np.mean(time_reduction):.1f} ms")
print(f"最小提升比例: {np.min(improvement_ratio):.1f}%")
print(f"最大提升比例: {np.max(improvement_ratio):.1f}%")
print(f"平均提升比例: {np.mean(improvement_ratio):.1f}%")

# 统计显著性检验
t_stat, p_value = stats.ttest_rel(basic_times, improved_times)
print("\n=== 统计检验 ===")
print(f"配对t检验结果: t = {t_stat:.3f}, p = {p_value:.6f}")
print("结论：" + ("升级版显著更快" if p_value < 0.05 else "无显著差异"))

# 稳定性分析（变异系数）
cv_basic = np.std(basic_times) / np.mean(basic_times) * 100
cv_improved = np.std(improved_times) / np.mean(improved_times) * 100
print("\n=== 稳定性分析 ===")
print(f"基础版变异系数: {cv_basic:.1f}% (离散程度较高)")
print(f"升级版变异系数: {cv_improved:.1f}% (更稳定)")

In [8]:
import numpy as np
from scipy import stats

# 分区数量
partitions = np.array([2, 4, 6, 8, 10, 12, 14, 16])

# 基础版数据
basic_rates = np.array([20.00, 50.00, 50.00, 50.00, 83.33, 66.67, 33.33, 66.67])
basic_compare_times = np.array([147.6, 196.3, 190.1, 210.8, 229.2, 227.9, 245.8, 303.9])  # ms
basic_total_times = np.array([297.8, 396.4, 378.9, 414.9, 466.4, 448.3, 470.6, 600.3])  # 改为 numpy 数组

# 升级版数据
improved_rates = np.array([20.00, 50.00, 50.00, 50.00, 83.33, 66.67, 33.33, 66.67])
improved_compare_times = np.array([10.2, 13.9, 17.6, 18.8, 17.7, 17.6, 18.5, 18.2])  # ms
improved_total_times = np.array([555.8, 481.4, 630.9, 493.9, 655.8, 716.4, 555.7, 607.5])  # 改为 numpy 数组

def calculate_correlation(x, y, method='pearson'):
    """计算相关系数和p值"""
    if method == 'pearson':
        r, p = stats.pearsonr(x, y)
    elif method == 'spearman':
        r, p = stats.spearmanr(x, y)
    else:
        raise ValueError("方法必须是 'pearson' 或 'spearman'")
    return r, p

def print_results(name, rates, compare_times, total_times):
    print(f"\n=== {name}方法 ===")
    
    # 错误检出率分析
    try:
        r_rate, p_rate = calculate_correlation(partitions, rates)
        print(f"分区数量与错误检出率:")
        print(f"  Pearson r = {r_rate:.3f}, p = {p_rate:.4f}")
        print(f"  检出率范围: {rates.min():.2f}% - {rates.max():.2f}%")
    except Exception as e:
        print(f"错误检出率分析出错: {str(e)}")
    
    # 比对时间分析
    try:
        r_compare, p_compare = calculate_correlation(partitions, compare_times, 'spearman')
        print(f"\n分区数量与比对时间:")
        print(f"  Spearman ρ = {r_compare:.3f}, p = {p_compare:.4f}")
        print(f"  比对时间范围: {compare_times.min():.1f}ms - {compare_times.max():.1f}ms")
        print(f"  时间中位数: {np.median(compare_times):.1f}ms")
    except Exception as e:
        print(f"比对时间分析出错: {str(e)}")

    # 总时间分析
    try:
        r_total, p_total = calculate_correlation(partitions, total_times, 'spearman')
        print(f"\n分区数量与总时间:")
        print(f"  Spearman ρ = {r_total:.3f}, p = {p_total:.4f}")
        print(f"  总时间范围: {min(total_times):.1f}ms - {max(total_times):.1f}ms")
        print(f"  时间中位数: {np.median(total_times):.1f}ms")
    except Exception as e:
        print(f"总时间分析出错: {str(e)}")

# 计算基础版结果
print_results("基础版", basic_rates, basic_compare_times, basic_total_times)

# 计算升级版结果
print_results("升级版", improved_rates, improved_compare_times, improved_total_times)

# 两种方法比对时间差异检验
try:
    t_stat, p_value = stats.ttest_rel(basic_compare_times, improved_compare_times)
    print(f"\n=== 方法间比对时间差异检验 ===")
    print(f"配对t检验: t = {t_stat:.3f}, p = {p_value:.6f}")
    improvement = basic_compare_times.mean() - improved_compare_times.mean()
    improvement_pct = (improvement / basic_compare_times.mean()) * 100
    print(f"平均改进幅度: {improvement:.1f}ms (降低{improvement_pct:.1f}%)")
except Exception as e:
    print(f"方法间比对时间差异检验出错: {str(e)}")

# 理论复杂度验证 (注释掉可能有问题的部分)
# try:
#     print("\n=== 理论复杂度验证 ===")
#     log_n = np.log(partitions)
#     expected = partitions * log_n
#     norm_compare = (improved_compare_times - improved_compare_times.min()) / (improved_compare_times.max() - improved_compare_times.min())
#     norm_expected = (expected - expected.min()) / (expected.max() - expected.min())
#     r_theory, p_theory = stats.pearsonr(norm_expected, norm_compare)
#     print(f"比对时间与NlogN理论复杂度相关性: r = {r_theory:.3f}, p = {p_theory:.4f}")
# except Exception as e:
#     print(f"理论复杂度验证出错: {str(e)}")


=== 基础版方法 ===
分区数量与错误检出率:
  Pearson r = 0.477, p = 0.2323
  检出率范围: 20.00% - 83.33%

分区数量与比对时间:
  Spearman ρ = 0.952, p = 0.0003
  比对时间范围: 147.6ms - 303.9ms
  时间中位数: 219.4ms

分区数量与总时间:
  Spearman ρ = 0.952, p = 0.0003
  总时间范围: 297.8ms - 600.3ms
  时间中位数: 431.6ms

=== 升级版方法 ===
分区数量与错误检出率:
  Pearson r = 0.477, p = 0.2323
  检出率范围: 20.00% - 83.33%

分区数量与比对时间:
  Spearman ρ = 0.683, p = 0.0621
  比对时间范围: 10.2ms - 18.8ms
  时间中位数: 17.6ms

分区数量与总时间:
  Spearman ρ = 0.333, p = 0.4198
  总时间范围: 481.4ms - 716.4ms
  时间中位数: 581.6ms

=== 方法间比对时间差异检验 ===
配对t检验: t = 13.117, p = 0.000003
平均改进幅度: 202.4ms (降低92.4%)
