In [1]:
import os
import glob
import pandas as pd

def merge_csv_files(input_dir: str, output_path: str, **read_csv_kwargs) -> pd.DataFrame:
    """
    디렉토리 내 모든 CSV 파일을 병합하여 단일 DataFrame으로 반환하고, 파일로 저장합니다.

    Parameters
    ----------
    input_dir : str
        CSV 파일들이 들어있는 디렉토리 경로.
    output_path : str
        병합된 CSV를 저장할 파일 경로 (예: 'merged.csv').
    read_csv_kwargs : dict, optional
        pandas.read_csv에 전달할 추가 인자 (인코딩, 구분자 등).

    Returns
    -------
    pd.DataFrame
        병합된 결과 DataFrame.
    """
    # 1. 디렉토리 내 .csv 파일 목록 수집
    pattern = os.path.join(input_dir, "*.csv")
    csv_files = glob.glob(pattern)
    if not csv_files:
        raise FileNotFoundError(f"No CSV files found in directory: {input_dir}")

    # 2. 각 파일을 읽어서 리스트에 추가
    df_list = []
    for fp in csv_files:
        print(f"Reading {os.path.basename(fp)}...")
        df = pd.read_csv(fp, **read_csv_kwargs)
        df_list.append(df)

    # 3. DataFrame 병합
    merged_df = pd.concat(df_list, ignore_index=True)
    print(f"Merged {len(csv_files)} files → {len(merged_df)} rows")

    # 4. 결과 저장
    merged_df.to_csv(output_path, index=False)
    print(f"Saved merged CSV to {output_path}")

    return merged_df

if __name__ == "__main__":
    # 예시 사용법
    input_dir = "./pipeline_results_ABAG/"
    output_path = "pipeline_ABAG_native_data.csv"
    # 예: 한글 인코딩이 필요할 때 encoding='cp949' 등을 추가
    merged_df = merge_csv_files(input_dir, output_path, encoding='utf-8')

Reading metrics_7kf1.csv...
Reading metrics_7su1.csv...
Reading metrics_7kf0.csv...
Reading metrics_7su0.csv...
Reading metrics_7sgm.csv...
Reading metrics_7sbg.csv...
Reading metrics_7sjo.csv...
Reading metrics_7swn.csv...
Reading metrics_7shy.csv...
Reading metrics_7soc.csv...
Reading metrics_7sjn.csv...
Reading metrics_7np1.csv...
Reading metrics_7t25.csv...
Reading metrics_7nx3.csv...
Reading metrics_7vng.csv...
Reading metrics_7ew5.csv...
Reading metrics_7z2m.csv...
Reading metrics_6x97.csv...
Reading metrics_7vyr.csv...
Reading metrics_7x7o.csv...
Reading metrics_7wrv.csv...
Reading metrics_7z4t.csv...
Reading metrics_7zf9.csv...
Reading metrics_7ued.csv...
Reading metrics_7s0e.csv...
Reading metrics_7tfo.csv...
Reading metrics_7tee.csv...
Reading metrics_7r40.csv...
Reading metrics_7zr7.csv...
Reading metrics_7o9w.csv...
Reading metrics_7yqx.csv...
Reading metrics_7wo5.csv...
Reading metrics_7yqz.csv...
Merged 33 files → 1650 rows
Saved merged CSV to pipeline_ABAG_native_data.cs

In [14]:
input_dir = "./pipeline_results_AbNb/"
output_path = "pipeline_AbNb_native_data.csv"
# 예: 한글 인코딩이 필요할 때 encoding='cp949' 등을 추가
merged_df = merge_csv_files(input_dir, output_path, encoding='utf-8')

Reading metrics_7uvf_7uvf.csv...
Reading metrics_7vux_7vux.csv...
Reading metrics_7unb_7unb.csv...
Reading metrics_7u5b_7u5b.csv...
Reading metrics_7wro_7wro.csv...
Reading metrics_7q6c_7q6c.csv...
Reading metrics_7wrl_7wrl.csv...
Reading metrics_7r58_7r58.csv...
Reading metrics_7uvh_7uvh.csv...
Reading metrics_7wog_7wog.csv...
Reading metrics_7wsc_7wsc.csv...
Reading metrics_7tp4_7tp4.csv...
Reading metrics_7t01_7t01.csv...
Reading metrics_7uzc_7uzc.csv...
Reading metrics_7tp3_7tp3.csv...
Reading metrics_7t7b_7t7b.csv...
Reading metrics_7u2d_7u2d.csv...
Reading metrics_7qnw_7qnw.csv...
Reading metrics_7wg3_7wg3.csv...
Reading metrics_7sww_7sww.csv...
Reading metrics_7q0i_7q0i.csv...
Reading metrics_7t17_7t17.csv...
Reading metrics_7xy8_7xy8.csv...
Reading metrics_7yru_7yru.csv...
Reading metrics_8d9z_8d9z.csv...
Reading metrics_7z2m_7z2m.csv...
Reading metrics_7x8p_7x8p.csv...
Reading metrics_7yds_7yds.csv...
Reading metrics_7upk_7upk.csv...
Reading metrics_7upb_7upb.csv...
Reading me

In [15]:
input_dir = "./pipeline_results_AbNb_decoy//"
output_path = "pipeline_AbNb_decoy_data.csv"
# 예: 한글 인코딩이 필요할 때 encoding='cp949' 등을 추가
merged_df = merge_csv_files(input_dir, output_path, encoding='utf-8')

Reading metrics_7tl0_7yru.csv...
Reading metrics_7t01_8f6l.csv...
Reading metrics_7t01_8g4p.csv...
Reading metrics_7so5_8f6l.csv...
Reading metrics_7t7b_8f6o.csv...
Reading metrics_7t17_7vux.csv...
Reading metrics_7t01_8tbq.csv...
Reading metrics_7so5_7r58.csv...
Reading metrics_7q0i_7vux.csv...
Reading metrics_7sww_7uvh.csv...
Reading metrics_7qnw_8ath.csv...
Reading metrics_7so5_8ath.csv...
Reading metrics_7t7b_8ath.csv...
Reading metrics_7t7b_7q6c.csv...
Reading metrics_7r58_8gzz.csv...
Reading metrics_7tl0_7u2d.csv...
Reading metrics_7r58_7tp4.csv...
Reading metrics_7t17_7tp3.csv...
Reading metrics_7t17_7uzc.csv...
Reading metrics_7qnw_7t7b.csv...
Reading metrics_7t17_8c3v.csv...
Reading metrics_7t01_7xcz.csv...
Reading metrics_7q6c_7u2d.csv...
Reading metrics_7r58_7xcz.csv...
Reading metrics_7so5_8c3v.csv...
Reading metrics_7sww_8fgx.csv...
Reading metrics_7qnw_8fgx.csv...
Reading metrics_7qnw_8il3.csv...
Reading metrics_7q6c_7wg3.csv...
Reading metrics_7r58_7t17.csv...
Reading me

In [16]:
input_dir = "./pipeline_results_ABAG_decoy/"
output_path = "pipeline_ABAG_decoy_data.csv"
# 예: 한글 인코딩이 필요할 때 encoding='cp949' 등을 추가
merged_df = merge_csv_files(input_dir, output_path, encoding='utf-8')

Reading metrics_7su1_7sgm.csv...
Reading metrics_7sjn_7sbg.csv...
Reading metrics_7su1_7soc.csv...
Reading metrics_7su1_7t25.csv...
Reading metrics_7soc_7sgm.csv...
Reading metrics_7nx3_7su1.csv...
Reading metrics_7shy_7sgm.csv...
Reading metrics_7np1_7z4t.csv...
Reading metrics_7sjn_7sjo.csv...
Reading metrics_7o9w_7zf9.csv...
Reading metrics_7sjn_7soc.csv...
Reading metrics_7ew5_7wrv.csv...
Reading metrics_7su0_7soc.csv...
Reading metrics_7su1_7ew5.csv...
Reading metrics_7t25_7su1.csv...
Reading metrics_7soc_7ew5.csv...
Reading metrics_7ued_7sbg.csv...
Reading metrics_7r40_7tfo.csv...
Reading metrics_7shy_6x97.csv...
Reading metrics_7tee_7sgm.csv...
Reading metrics_7tfo_7z4t.csv...
Reading metrics_7wo5_7sgm.csv...
Reading metrics_7swn_7nx3.csv...
Reading metrics_7vng_7sjn.csv...
Reading metrics_7vng_7zf9.csv...
Reading metrics_7wrv_7shy.csv...
Reading metrics_7zf9_7su0.csv...
Reading metrics_6x97_7su0.csv...
Reading metrics_7z4t_7wrv.csv...
Reading metrics_7tfo_6x97.csv...
Reading me

In [4]:
directory='/home/cseomoon/project/ABAG/DB/AbNb_structure/AF3/native'

query_list=[
        name
        for name in os.listdir(directory)
        if os.path.isdir(os.path.join(directory, name))
    ]
query_list

['7q0i_7q0i',
 '7q6c_7q6c',
 '7qnw_7qnw',
 '7r58_7r58',
 '7so5_7so5',
 '7sww_7sww',
 '7t01_7t01',
 '7t17_7t17',
 '7t7b_7t7b',
 '7tl0_7tl0',
 '7tp3_7tp3',
 '7tp4_7tp4',
 '7u2d_7u2d',
 '7u5b_7u5b',
 '7unb_7unb',
 '7upb_7upb',
 '7upk_7upk',
 '7uvf_7uvf',
 '7uvh_7uvh',
 '7uz7_7uz7',
 '7uzc_7uzc',
 '7vux_7vux',
 '7wbh_7wbh',
 '7wcp_7wcp',
 '7wg3_7wg3',
 '7wog_7wog',
 '7wpe_7wpe',
 '7wrl_7wrl',
 '7wro_7wro',
 '7wsc_7wsc',
 '7x6a_7x6a',
 '7x7t_7x7t',
 '7x8p_7x8p',
 '7xcz_7xcz',
 '7xdb_7xdb',
 '7xy8_7xy8',
 '7yds_7yds',
 '7yru_7yru',
 '7z2m_7z2m',
 '7zlk_7zlk',
 '7zwi_7zwi',
 '8ath_8ath',
 '8c3v_8c3v',
 '8cii_8cii',
 '8cin_8cin',
 '8cz5_8cz5',
 '8d9z_8d9z',
 '8dad_8dad',
 '8dg9_8dg9',
 '8ds5_8ds5',
 '8dxt_8dxt',
 '8dxu_8dxu',
 '8e6j_8e6j',
 '8ez3_8ez3',
 '8f6l_8f6l',
 '8f6o_8f6o',
 '8fgx_8fgx',
 '8g3p_8g3p',
 '8g4p_8g4p',
 '8gb6_8gb6',
 '8gzz_8gzz',
 '8hc4_8hc4',
 '8heb_8heb',
 '8hp9_8hp9',
 '8hpv_8hpv',
 '8il3_8il3',
 '8j7t_8j7t',
 '8oxw_8oxw',
 '8qrg_8qrg',
 '8r1d_8r1d',
 '8sdg_8sdg',
 '8sgn

In [8]:
df=pd.read_csv('pipeline_AbNb_native_data.csv')
query_set=set(df['query'].unique())

In [9]:
results_set=set(query_list)

In [12]:
targets=results_set-query_set

In [13]:
targets

{'8fgx_8fgx', '8hp9_8hp9'}