# 03_analysis.ipynb

Assess crosslink satisfaction for a PDB model.

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd

def read_simple_ca_pdb(pdb_path):
    coords = {}
    with open(pdb_path) as f:
        for line in f:
            if line.startswith('ATOM'):
                chain = line[21].strip()
                resseq = int(line[22:26])
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                coords.setdefault(chain, {})[resseq] = np.array([x,y,z])
    # convert to ordered arrays
    coords_arr = {c: np.vstack([coords[c][i] for i in sorted(coords[c])]) for c in coords}
    return coords_arr

# Paths
pdb_baseline = '../outputs/run_af3_baseline.pdb'
pdb_guided = '../outputs/run_crosslink_guided.pdb'
crosslink_file = '../data/crosslinks/barnase_barstar.tsv'

# Read data (if files missing, warn)
import os
if not os.path.exists(pdb_baseline):
    print('Baseline PDB not found. Run 01_run_af3.ipynb first.')
else:
    coords_base = read_simple_ca_pdb(pdb_baseline)
if not os.path.exists(pdb_guided):
    print('Guided PDB not found. Run 02_add_crosslinks.ipynb first.')
else:
    coords_guided = read_simple_ca_pdb(pdb_guided)

df = pd.read_csv(crosslink_file, sep='\t')
def check_crosslinks(coords, df):
    results = []
    for _, row in df.iterrows():
        c1, r1, c2, r2, dmax = row['chain1'], int(row['res1']), row['chain2'], int(row['res2']), float(row['dmax'])
        p1 = coords[c1][r1-1]
        p2 = coords[c2][r2-1]
        d = np.linalg.norm(p1-p2)
        results.append((c1,r1,c2,r2,d,d<=dmax))
    return pd.DataFrame(results, columns=['c1','r1','c2','r2','dist','satisfied'])

if os.path.exists(pdb_baseline):
    print('\nBaseline crosslink satisfaction:')
    print(check_crosslinks(coords_base, df))

if os.path.exists(pdb_guided):
    print('\nGuided crosslink satisfaction:')
    print(check_crosslinks(coords_guided, df))