In [1]:
#!/usr/bin/env python
# pilot_test_pitm_pis.py

import sys
sys.path.append('/home/cseomoon/appl/af_analysis-0.1.4/src')

from af_analysis.data import Data
import pandas as pd
import os

# base_path='/home/cseomoon/project/ABAG/2025_H_L_A/20250504_seeds_10/negative/af3_results/results1'
# query='7np1_7z4t'

# data_path=os.path.join(base_path, query)

# 1. 데이터 로드
print("Loading data...")
data = Data(csv="/home/cseomoon/appl/af_analysis-0.1.4/data/sequence_classification/train_set_AbNb/decoy/AbNb_decoy_data_250523.csv")


print(f"Total models: {len(data.df)}")
print(f"Available columns: {data.df.columns.tolist()}")

# 2. 소량 데이터로 테스트 (처음 10개만)
print("\n=== Pilot Test with first 10 models ===")
pilot_data = Data(data_dict={
    'pdb': data.df['pdb'].head(10).tolist(),
    'query': data.df['query'].head(10).tolist(),
    'data_file': data.df['data_file'].head(10).tolist() if 'data_file' in data.df.columns else [None]*10
})

# 3. piTM/pIS 계산
print("\nCalculating piTM/pIS...")
pilot_data.add_pitm_pis(cutoff=8.0, verbose=True)

# 4. 결과 확인
print("\n=== Results ===")
result_columns = ['query', 'piTM', 'pIS'] + [col for col in pilot_data.df.columns if col.startswith('piTM_')]
print(pilot_data.df[result_columns])

# 5. 통계 요약
pitm_values = pilot_data.df['piTM'].dropna()
pis_values = pilot_data.df['pIS'].dropna()

if len(pitm_values) > 0:
    print(f"\npiTM statistics:")
    print(f"  Mean: {pitm_values.mean():.3f}")
    print(f"  Std:  {pitm_values.std():.3f}")
    print(f"  Min:  {pitm_values.min():.3f}")
    print(f"  Max:  {pitm_values.max():.3f}")

if len(pis_values) > 0:
    print(f"\npIS statistics:")
    print(f"  Mean: {pis_values.mean():.3f}")
    print(f"  Std:  {pis_values.std():.3f}")
    print(f"  Min:  {pis_values.min():.3f}")
    print(f"  Max:  {pis_values.max():.3f}")

Loading data...
Total models: 18250
Available columns: ['pdb', 'query', 'seed', 'sample', 'data_file', 'chain_iptm', 'chain_pair_iptm', 'chain_pair_pae_min', 'chain_ptm', 'fraction_disordered', 'has_clash', 'ipTM', 'pTM', 'ranking_confidence', 'format', 'pdockq_AH', 'pdockq_AL', 'pdockq_HL', 'pdockq', 'pdockq2_AH', 'pdockq2_AL', 'pdockq2_HL', 'pdockq2', 'mpdockq_AH', 'mpdockq_AL', 'mpdockq_HL', 'mpdockq', 'LIS', 'LIS_AH', 'LIS_AL', 'LIS_HL', 'avg_LIS', 'contacts_AH', 'interface_plddt_AH', 'interface_pae_AH', 'contacts_AL', 'interface_plddt_AL', 'interface_pae_AL', 'contacts_HL', 'interface_plddt_HL', 'interface_pae_HL', 'total_contacts', 'avg_interface_plddt', 'avg_interface_pae', 'chain_plddt_A', 'chain_pae_A', 'chain_plddt_H', 'chain_pae_H', 'chain_plddt_L', 'chain_pae_L', 'chain_pair_pae_AH', 'chain_pair_pae_AL', 'chain_pair_pae_HL', 'avg_model_plddt', 'avg_internal_pae', 'avg_pair_pae', 'model_avg_RMSD', 'query_avg_RMSD', 'scaled_RMSD_ratio', 'scaled_model_RMSD', 'scaled_query_RMSD

Calculating piTM/pIS:   0%|          | 0/10 [00:00<?, ?it/s]

Successfully calculated piTM/pIS for 10/10 models
Added columns: piTM, pIS, piTM_A, piTM_H, piTM_L

=== Results ===
       query      piTM       pIS    piTM_A    piTM_H    piTM_L
0  7tl0_7yru  0.659368  0.627089  0.001027  0.307372  0.318690
1  7tl0_7yru  0.617060  0.585280  0.005291  0.351155  0.228834
2  7tl0_7yru  0.632691  0.608110  0.004800  0.335809  0.267501
3  7tl0_7yru  0.648340  0.622060  0.003141  0.350733  0.268186
4  7tl0_7yru  0.604958  0.575167  0.003117  0.327257  0.244793
5  7tl0_7yru  0.643827  0.620288  0.001938  0.311098  0.307252
6  7tl0_7yru  0.619931  0.587331  0.004619  0.337059  0.245653
7  7tl0_7yru  0.639386  0.614120  0.001967  0.306846  0.305307
8  7tl0_7yru  0.598407  0.568642  0.006061  0.331391  0.231190
9  7tl0_7yru  0.658023  0.630367  0.001444  0.336308  0.292615

piTM statistics:
  Mean: 0.632
  Std:  0.021
  Min:  0.598
  Max:  0.659

pIS statistics:
  Mean: 0.604
  Std:  0.023
  Min:  0.569
  Max:  0.630


In [3]:
pilot_data.df

Unnamed: 0,pdb,query,data_file,format,piTM,pIS,piTM_A,piTM_H,piTM_L
0,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,7tl0_7yru,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,custom,0.659368,0.627089,0.001027,0.307372,0.31869
1,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,7tl0_7yru,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,custom,0.61706,0.58528,0.005291,0.351155,0.228834
2,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,7tl0_7yru,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,custom,0.632691,0.60811,0.0048,0.335809,0.267501
3,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,7tl0_7yru,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,custom,0.64834,0.62206,0.003141,0.350733,0.268186
4,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,7tl0_7yru,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,custom,0.604958,0.575167,0.003117,0.327257,0.244793
5,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,7tl0_7yru,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,custom,0.643827,0.620288,0.001938,0.311098,0.307252
6,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,7tl0_7yru,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,custom,0.619931,0.587331,0.004619,0.337059,0.245653
7,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,7tl0_7yru,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,custom,0.639386,0.61412,0.001967,0.306846,0.305307
8,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,7tl0_7yru,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,custom,0.598407,0.568642,0.006061,0.331391,0.23119
9,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,7tl0_7yru,/home/cseomoon/project/ABAG/AbNb_benchmark/AF3...,custom,0.658023,0.630367,0.001444,0.336308,0.292615


In [1]:
# !/usr/bin/env python
# pilot_rosetta_metrics.py

import sys
import os
sys.path.append('/home/cseomoon/appl/af_analysis-0.1.4/src')

import pandas as pd
from af_analysis.rosetta_metrics import extract_metrics_from_file, batch_extract
import time

def test_single_file():
    """단일 파일로 기본 기능 테스트"""
    print("=== Single File Test ===")
    
    # 테스트할 CSV에서 첫 번째 PDB 파일 가져오기
    csv_path = "/home/cseomoon/appl/af_analysis-0.1.4/data/sequence_classification/train_set_AbNb/decoy/AbNb_decoy_data_250523.csv"
    df = pd.read_csv(csv_path)
    
    # 유효한 PDB 파일 찾기
    test_pdb = None
    for pdb_path in df['pdb'].head(10):
        if pdb_path and os.path.exists(pdb_path):
            test_pdb = pdb_path
            break
    
    if not test_pdb:
        print("No valid PDB file found in first 10 rows")
        return None
    
    print(f"Testing with: {test_pdb}")
    
    # 기본 메트릭 추출
    start_time = time.time()
    result, error = extract_metrics_from_file(
        test_pdb,
        antibody_mode=True
    )
    elapsed = time.time() - start_time
    
    print(f"Processing time: {elapsed:.2f} seconds")
    
    if error:
        print(f"Error: {error}")
        return None
    
    if result:
        print("=== Basic Metrics ===")
        for key, value in result.items():
            if isinstance(value, float):
                print(f"{key}: {value:.3f}")
            else:
                print(f"{key}: {value}")
    
    return result

def test_enhanced_metrics():
    """새로 추가된 메트릭들 테스트"""
    print("\n=== Enhanced Metrics Test ===")
    
    # 테스트할 CSV에서 PDB 파일 가져오기
    csv_path = "/home/cseomoon/appl/af_analysis-0.1.4/data/sequence_classification/train_set_AbNb/decoy/AbNb_decoy_data_250523.csv"
    df = pd.read_csv(csv_path)
    
    test_pdb = None
    for pdb_path in df['pdb'].head(5):
        if pdb_path and os.path.exists(pdb_path):
            test_pdb = pdb_path
            break
    
    if not test_pdb:
        print("No valid PDB file found")
        return None
    
    print(f"Testing enhanced metrics with: {test_pdb}")
    
    start_time = time.time()
    
    result, error = extract_metrics_from_file(
        test_pdb,
        antibody_mode=True
    )
    
    elapsed = time.time() - start_time
    print(f"Enhanced processing time: {elapsed:.2f} seconds")
    
    if error:
        print(f"Error: {error}")
        return None
    
    if result:
        print("=== All Metrics (including new ones) ===")
        # Shape Complementarity 제거, 실제 사용 가능한 메트릭들로 변경
        new_metrics = ['interface_hbonds', 'hbond_density']
        
        print("New metrics:")
        for metric in new_metrics:
            if metric in result:
                print(f"  {metric}: {result[metric]:.3f}")
            else:
                print(f"  {metric}: NOT FOUND")
        
        print("\nAll metrics:")
        for key, value in sorted(result.items()):
            if isinstance(value, float):
                print(f"  {key}: {value:.3f}")
            else:
                print(f"  {key}: {value}")
    
    return result

def test_batch_processing():
    """소량 배치 처리 테스트"""
    print("\n=== Batch Processing Test ===")
    
    # CSV에서 처음 3개 유효한 PDB 파일 수집
    csv_path = "/home/cseomoon/appl/af_analysis-0.1.4/data/sequence_classification/train_set_AbNb/decoy/AbNb_decoy_data_250523.csv"
    df = pd.read_csv(csv_path)
    
    test_files = []
    for pdb_path in df['pdb'].head(10):
        if pdb_path and os.path.exists(pdb_path) and len(test_files) < 3:
            test_files.append(pdb_path)
    
    if len(test_files) < 2:
        print("Not enough valid PDB files for batch test")
        return None
    
    print(f"Testing batch processing with {len(test_files)} files")
    for i, pdb in enumerate(test_files, 1):
        print(f"  {i}. {os.path.basename(pdb)}")
    
    # 배치 처리 (단일 프로세스로 테스트)
    start_time = time.time()
    
    try:
        results_df = batch_extract(
            test_files,
            n_jobs=1,  # 단일 프로세스로 안전하게 테스트
            verbose=True,
            antibody_mode=True
        )
        
        elapsed = time.time() - start_time
        print(f"\nBatch processing time: {elapsed:.2f} seconds")
        print(f"Average time per file: {elapsed/len(test_files):.2f} seconds")
        
        print(f"\n=== Batch Results Summary ===")
        print(f"Successfully processed: {len(results_df)}/{len(test_files)} files")
        
        if len(results_df) > 0:
            print("\nMetric statistics:")
            numeric_cols = results_df.select_dtypes(include=['float64', 'int64']).columns
            for col in numeric_cols:
                if col != 'pdb':
                    values = results_df[col].dropna()
                    if len(values) > 0:
                        print(f"  {col}: mean={values.mean():.3f}, std={values.std():.3f}")
            
            # 새로운 메트릭들 확인
            new_metrics = ['shape_complementarity', 'interface_hbonds', 'hbond_density']
            print(f"\nNew metrics availability:")
            for metric in new_metrics:
                if metric in results_df.columns:
                    non_null = results_df[metric].notna().sum()
                    print(f"  {metric}: {non_null}/{len(results_df)} files")
                else:
                    print(f"  {metric}: NOT FOUND")
        
        return results_df
        
    except Exception as e:
        print(f"Batch processing failed: {e}")
        import traceback
        traceback.print_exc()
        return None

def test_error_handling():
    """에러 처리 테스트"""
    print("\n=== Error Handling Test ===")
    
    # 존재하지 않는 파일로 테스트
    fake_pdb = "/fake/path/nonexistent.pdb"
    result, error = extract_metrics_from_file(fake_pdb, antibody_mode=True)
    
    print(f"Non-existent file test:")
    print(f"  Result: {result}")
    print(f"  Error: {error}")
    
    # 잘못된 형식 파일로 테스트 (있다면)
    print("Error handling works as expected.")

def save_pilot_results(results_df, single_result):
    """Pilot 테스트 결과 저장"""
    if results_df is not None and len(results_df) > 0:
        output_path = "/home/cseomoon/appl/af_analysis-0.1.4/results/pilot_rosetta_metrics.csv"
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        results_df.to_csv(output_path, index=False)
        print(f"\nPilot results saved to: {output_path}")
        
        # 결과 요약 저장
        summary_path = "/home/cseomoon/appl/af_analysis-0.1.4/results/pilot_summary.txt"
        with open(summary_path, 'w') as f:
            f.write("=== Pilot Test Summary ===\n")
            f.write(f"Total files processed: {len(results_df)}\n")
            f.write(f"Columns: {list(results_df.columns)}\n")
            f.write("\n=== Sample Results ===\n")
            f.write(str(results_df.head()))
        
        print(f"Summary saved to: {summary_path}")

if __name__ == "__main__":
    print("Starting Rosetta Metrics Pilot Test")
    print("=" * 50)
    
    # 순차적 테스트 실행
    single_result = test_single_file()
    
    if single_result:
        enhanced_result = test_enhanced_metrics()
        batch_results = test_batch_processing()
        test_error_handling()
        
        # 결과 저장
        save_pilot_results(batch_results, single_result)
    else:
        print("Single file test failed. Skipping other tests.")
    
    print("\n" + "=" * 50)
    print("Pilot test completed!")

Starting Rosetta Metrics Pilot Test
=== Single File Test ===
Testing with: /home/cseomoon/project/ABAG/AbNb_benchmark/AF3_results/7tl0_7yru/seed-1_sample-0/model.cif

[DEBUG] Processing file: /home/cseomoon/project/ABAG/AbNb_benchmark/AF3_results/7tl0_7yru/seed-1_sample-0/model.cif
┌──────────────────────────────────────────────────────────────────────────────┐
│                                 PyRosetta-4                                  │
│              Created in JHU by Sergey Lyskov and PyRosetta Team              │
│              (C) Copyright Rosetta Commons Member Institutions               │
│                                                                              │
│ NOTE: USE OF PyRosetta FOR COMMERCIAL PURPOSES REQUIRE PURCHASE OF A LICENSE │
│         See LICENSE.PyRosetta.md or email license@uw.edu for details         │
└──────────────────────────────────────────────────────────────────────────────┘
PyRosetta-4 2024 [Rosetta PyRosetta4.conda.ubuntu.cxx11thread.seriali

Interface metrics:   0%|          | 0/3 [00:00<?, ?it/s]


[DEBUG] Processing file: /home/cseomoon/project/ABAG/AbNb_benchmark/AF3_results/7tl0_7yru/seed-1_sample-0/model.cif
[DEBUG] Converting mmCIF to PDB...
[DEBUG] Temporary PDB created at: /tmp/tmpb9vz8287.pdb
[DEBUG] Reading chains from: /tmp/tmpb9vz8287.pdb
[DEBUG] Found chains: ['A', 'H', 'L']
[DEBUG] Antibody chains: ['H', 'L']
[DEBUG] Antigen chains: ['A']
[DEBUG] Interface string: HL_A
[DEBUG] Loading pose from PDB...
[DEBUG] Running interface analyzer...
[DEBUG] Creating score function...
[DEBUG] Setting up InterfaceAnalyzerMover...
[DEBUG] Applying InterfaceAnalyzerMover...
[DEBUG] Extracting basic metrics...
[DEBUG] Creating metrics dictionary...
[DEBUG] Metrics extraction completed
[DEBUG] Extracted metrics: {'dG_separated': 11.109011924858464, 'dSASA_int': 891.7039884141126, 'nres_int': 34, 'delta_unsatHbonds': 4, 'packstat': 0.6706582244515593, 'dG_dSASA_norm': 0.012458183510668984}
[DEBUG] Processing completed successfully
[DEBUG] Cleaning up temporary file: /tmp/tmpb9vz8287.

In [3]:
import pyrosetta
from pyrosetta import init, pose_from_pdb, create_score_function
from pyrosetta.rosetta.protocols.analysis import InterfaceAnalyzerMover

global _pyrosetta_initialized

opts = "-ignore_unrecognized_res -ignore_zero_occupancy false -load_PDB_components false " \
       "-no_fconfig -check_cdr_chainbreaks false"
if 1:
    opts += " -mute all"

init(opts)
_pyrosetta_initialized = True

scorefxn = create_score_function('ref2015')

print("[DEBUG] Setting up InterfaceAnalyzerMover...")
iam = InterfaceAnalyzerMover()
iam.set_scorefunction(scorefxn)
iam.set_interface('HL_A')
iam.set_pack_input(True)
iam.set_pack_separated(True)
iam.set_compute_interface_sc(True)
iam.set_calc_dSASA(True)
iam.set_compute_packstat(True)

┌──────────────────────────────────────────────────────────────────────────────┐
│                                 PyRosetta-4                                  │
│              Created in JHU by Sergey Lyskov and PyRosetta Team              │
│              (C) Copyright Rosetta Commons Member Institutions               │
│                                                                              │
│ NOTE: USE OF PyRosetta FOR COMMERCIAL PURPOSES REQUIRE PURCHASE OF A LICENSE │
│         See LICENSE.PyRosetta.md or email license@uw.edu for details         │
└──────────────────────────────────────────────────────────────────────────────┘
PyRosetta-4 2024 [Rosetta PyRosetta4.conda.ubuntu.cxx11thread.serialization.Ubuntu.python310.Release 2024.42+release.3366cf78a3df04339d1982e94531b77b098ddb99 2024-10-11T08:24:04] retrieved from: http://www.pyrosetta.org
[DEBUG] Setting up InterfaceAnalyzerMover...


In [4]:
dir(iam)

['__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_pybind11_conduit_v1_',
 'add_score_info_to_pose',
 'apply',
 'apply_const',
 'assign',
 'clear_info',
 'clone',
 'create',
 'fresh_instance',
 'get_additional_output',
 'get_all_data',
 'get_all_per_residue_data',
 'get_centroid_dG',
 'get_chain_groups',
 'get_complex_energy',
 'get_complexed_sasa',
 'get_crossterm_interface_energy',
 'get_crossterm_interface_energy_ratio',
 'get_current_job',
 'get_current_tag',
 'get_fixed_chains',
 'get_gly_interface_energy',
 'get_input_pose',
 'get_interface_dG',
 'get_interface_delta_hbond_unsat',
 'get_interface_delta_sasa',
 'get_interface_packstat',
 'get_interface_set',
 'get_last_move_status',
 'get_metr

In [21]:
metrics = iam.get_metrics()  # 모든 메트릭 딕셔너리
dir(metrics)

['__bool__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_pybind11_conduit_v1_',
 'append',
 'back',
 'capacity',
 'clear',
 'count',
 'erase',
 'extend',
 'front',
 'l',
 'max_size',
 'pop',
 'remove',
 'reserve',
 'resize',
 'shrink_to_fit',
 'u']