In [None]:
import shutil
from pathlib import Path

import pandas as pd
from Bio.PDB import PDBParser, PDBIO
from Bio.PDB.Polypeptide import one_to_three


def extract_num(p: Path):
    return int(p.stem.split('_')[-1])

In [None]:
input_path = Path("/home/tz365/WS/se3_diffusion/sample_output/se3-ras")
output_path = Path("/home/tz365/WS/PROTEVAL/data/RAS/generated")
method = "SM"

(output_path / "B").mkdir(exist_ok=False)
(output_path / "Q").mkdir(exist_ok=False)
(output_path / "BQ").mkdir(exist_ok=False)

for i, s in enumerate(sorted(input_path.iterdir())):
    for ss in s.glob("length_*"):
        s = ss / "sample_0"
        print(i, s)

        # -> B
        aa_pdb = output_path / f"B/{method}_{i}.pdb"
        shutil.copy(s / "sample_1.pdb", aa_pdb)

        # -> Q
        sc_results = pd.read_csv(s / "self_consistency" / "sc_results.csv")
        best_sequence = sc_results.loc[sc_results['tm_score'].idxmax(), 'sequence']
        with open(output_path / f"Q/{method}_{i}.fasta", "w") as f:
            f.write(f">{method}_{i}\n")
            f.write(best_sequence)

        # -> BQ
        bq_pdb = output_path / f"BQ/{method}_{i}.pdb"
        structure = PDBParser(QUIET=True).get_structure("prot", aa_pdb)
        chain = next(structure.get_chains())
        for res, aa in zip(chain, best_sequence):
            res.resname = one_to_three(aa)
        io = PDBIO()
        io.set_structure(structure)
        io.save(str(bq_pdb))

        # -> BQS
        # -> BQSH

In [None]:
input_path = Path("/home/tz365/WS/FoldFlow/sample_output/ff-ras-ot")
output_path = Path("/home/tz365/WS/PROTEVAL/data/RAS/generated")
method = "FM"

assert (output_path / "B").exists()
assert (output_path / "Q").exists()
assert (output_path / "BQ").exists()

i = 0
for s in sorted(input_path.glob("length_*"), key=extract_num):
    for ss in sorted(s.glob("sample_*")):
        print(i, ss)

        # -> B
        aa_pdb = output_path / f"B/{method}_{i}.pdb"
        shutil.copy(ss / "sample_1.pdb", aa_pdb)

        # -> Q
        sc_results = pd.read_csv(ss / "self_consistency" / "sc_results.csv")
        best_sequence = sc_results.loc[sc_results['tm_score'].idxmax(), 'sequence']
        with open(output_path / f"Q/{method}_{i}.fasta", "w") as f:
            f.write(f">{method}_{i}\n")
            f.write(best_sequence)

        # -> BQ
        bq_pdb = output_path / f"BQ/{method}_{i}.pdb"
        structure = PDBParser(QUIET=True).get_structure("prot", aa_pdb)
        chain = next(structure.get_chains())
        for res, aa in zip(chain, best_sequence):
            res.resname = one_to_three(aa)
        io = PDBIO()
        io.set_structure(structure)
        io.save(str(bq_pdb))

        # -> BQS
        # -> BQSH
        i += 1