In [None]:
import pandas as pd
import os

bbj_info_path = "/data1/jiapl_group/lishuhua/BBJ/bbj_info.tsv"
gwas_base_dir = "/data1/jiapl_group/lishuhua/BBJ/general/"
output_base_dir = "/data1/jiapl_group/lishuhua/project/PRS_benchmark/real_data/BBJ/r2_with_ukb/data/"

def get_summary_stats(gwas_file, n_sample, output_path, chunksize=500000):
    required_columns = ["SNP", "ALLELE1", "ALLELE0", "A1FREQ", "BETA", "SE"]
    chunk_iter = pd.read_csv(gwas_file, sep="\t", usecols=required_columns, chunksize=chunksize)
    is_first_chunk = True
    for chunk in chunk_iter:
        chunk.rename(columns={"SNP": "rsid", "ALLELE1": "a2", "ALLELE0": "a1", "A1FREQ": "af", "BETA": "beta"}, inplace=True)
        chunk["N"] = n_sample
        chunk = chunk[["rsid", "a1", "a2", "af", "N", "beta", "SE"]]
        if is_first_chunk:
            chunk.to_csv(output_path, sep="\t", index=False, mode='w', header=True)
            is_first_chunk = False
        else:
            chunk.to_csv(output_path, sep="\t", index=False, mode='a', header=False)

bbj_info = pd.read_csv(bbj_info_path, sep="\t")
for _, row in bbj_info.iterrows():
    trait = row['trait']
    n_sample = int(row["n"])
    file_prefix = row["file_prefix"]
    gwas_file_dir = os.path.join(gwas_base_dir, f"hum0197.v3.BBJ.{file_prefix}.v1/")
    gwas_file = None
    for file_name in os.listdir(gwas_file_dir):
        if file_name.endswith(".auto.txt") and file_name.startswith("GWASsummary_"):
            gwas_file = os.path.join(gwas_file_dir, file_name)
            break
    if gwas_file is None:
        print(f"GWAS file not found for trait {trait}")
        continue
    output_path = os.path.join(output_base_dir, f"{trait}.txt")
    if os.path.exists(output_path):
        print(f"Output file already exists for trait {trait}, skipping...")
        continue
    print(f"Processing trait {trait}...")
    get_summary_stats(gwas_file, n_sample, output_path)

In [None]:
import pandas as pd
import numpy as np
import os

eas_gwas_base_dir = "/data1/jiapl_group/lishuhua/project/PRS_benchmark/real_data/BBJ/r2_with_ukb/data/"
eur_gwas_base_dir = "/data1/jiapl_group/lishuhua/project/PRS_benchmark/software/popcorn/gwas/EUR/"
output_base_dir = "/data1/jiapl_group/lishuhua/project/PRS_benchmark/real_data/BBJ/r2_with_ukb/res/"
score_file_path = "/data1/jiapl_group/lishuhua/software/general/Popcorn-master/score/EUR_EAS_all_gen_eff.cscore"

trait_list = ['waist', 'height', 'pulse', 'dbp', 'sbp', 'smoke', 'drink', 'bmi', 'wbc', 'rbc', 'hb', 'plt', 'lymph', 'mono', 'neut', 'eos', 'alt', 'ast', 'bun', 'cholesterol', 'creatinine', 'glucose', 'ggt', 'hdl', 'ldl', 'triglycerides', 'ua']

for trait_name in trait_list:
    eas_sumstats_path = os.path.join(eas_gwas_base_dir, f"{trait_name}.txt")
    eur_sumstats_path = os.path.join(eur_gwas_base_dir, f"{trait_name}.txt")
    if not os.path.exists(eas_sumstats_path):
        print(f"EAS summary stats file not found for trait {trait_name}, skipping...")
        continue
    if not os.path.exists(eur_sumstats_path):
        print(f"EUR summary stats file not found for trait {trait_name}, skipping...")
        continue
    output_path = os.path.join(output_base_dir, f"{trait_name}.txt")
    if os.path.exists(output_path):
        print(f"Output file already exists for trait {trait_name}, skipping...")
        continue
    print(f"Processing trait {trait_name}...")
    cmd = f"popcorn fit -v 1 --cfile {score_file_path} --gen_effect --sfile1 {eur_sumstats_path} --sfile2 {eas_sumstats_path} {output_path}"
    os.system(cmd)

In [None]:
import pandas as pd
import numpy as np
import os


def parse_popcorn_output(file_path):
    """解析单个Popcorn输出文件并提取所需数据。"""
    results = []
    trait_name = os.path.basename(file_path).replace('.txt', '')
    
    with open(file_path, 'r') as f:
        lines = f.readlines()
        
    # 跳过表头
    for line in lines[1:]:
        parts = line.split()
        if not parts:
            continue
        
        parameter = parts[0]
        if parameter in ["h1^2", "h2^2", "pge"]:
            value = float(parts[1])
            se = float(parts[2])
            results.append({
                "Trait": trait_name,
                "Parameter": parameter,
                "Value": value,
                "SE": se
            })
            
    return results

def main():
    """主函数，遍历目录，处理文件并保存为TSV。"""
    # *** 请修改为您的Popcorn结果文件所在的目录 ***
    input_directory = '/data1/jiapl_group/lishuhua/project/PRS_benchmark/real_data/BBJ/r2_with_ukb/res/' 
    
    all_results = []
    
    print(f"开始扫描目录: {os.path.abspath(input_directory)}")
    
    for filename in os.listdir(input_directory):
        if filename.endswith(".txt"):
            file_path = os.path.join(input_directory, filename)
            print(f"正在处理文件: {filename}")
            try:
                file_results = parse_popcorn_output(file_path)
                all_results.extend(file_results)
            except Exception as e:
                print(f"处理文件 {filename} 时出错: {e}")

    if not all_results:
        print("警告: 未找到任何有效的Popcorn结果文件或未能提取任何数据。")
        return

    # 将结果转换为DataFrame并保存为TSV文件
    df = pd.DataFrame(all_results)
    output_path = "/data1/jiapl_group/lishuhua/project/PRS_benchmark/real_data/BBJ/r2_with_ukb/res/popcorn_results_bbj_ukb.tsv"
    df.to_csv(output_path, sep='\t', index=False)
    
    print(f"\n处理完成！数据已成功保存到: {output_path}")
    print("文件内容预览:")
    print(df)

if __name__ == "__main__":
    main()