In [5]:
from Bio.PDB import PDBParser
from Bio.SeqUtils import seq1

def pdb_to_seq_with_length(pdb_file, chain_id):
    """
    Extract the sequence and length of a specific chain from a PDB file.

    Parameters:
    - pdb_file (str): Path to the PDB file.
    - chain_id (str): The chain identifier to extract the sequence.

    Returns:
    - tuple: A tuple containing the sequence as a one-letter code string and its length.
    """
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("structure", pdb_file)

    sequence = []
    print("=== Debugging Output ===")
    for model in structure:
        for chain in model:
            print(f"Found chain: {chain.id}")  # Print all chains in the structure
            if chain.id == chain_id:
                print(f"Processing chain {chain_id}")
                for residue in chain:
                    print(f"Residue ID: {residue.id}, Residue Name: {residue.get_resname()}")
                    if residue.id[0] == " ":  # Only consider standard residues
                        residue_name = residue.get_resname()
                        try:
                            one_letter = seq1(residue_name)
                            sequence.append(one_letter)
                            print(f"Converted {residue_name} to {one_letter}")
                        except KeyError:
                            print(f"Skipping unknown residue: {residue_name}")
                break  # Stop after finding the correct chain
        if sequence:  # Stop if the chain is found and processed
            break

    seq_str = "".join(sequence)
    print("=== End of Debugging Output ===")
    return seq_str, len(seq_str)

# Example usage
pdb_file_path = "original_mousecys.pdb"  # Replace with your PDB file path
chain_identifier = "X"  # Replace with the desired chain ID
sequence, length = pdb_to_seq_with_length(pdb_file_path, chain_identifier)

print(f"\nSequence for chain {chain_identifier}: {sequence}")
print(f"Length of the sequence: {length}")


=== Debugging Output ===
Found chain: X
Processing chain X
Residue ID: (' ', 1, ' '), Residue Name: PRO
Converted PRO to P
Residue ID: (' ', 2, ' '), Residue Name: GLN
Converted GLN to Q
Residue ID: (' ', 3, ' '), Residue Name: LYS
Converted LYS to K
Residue ID: (' ', 4, ' '), Residue Name: SER
Converted SER to S
Residue ID: (' ', 5, ' '), Residue Name: LYS
Converted LYS to K
Residue ID: (' ', 6, ' '), Residue Name: VAL
Converted VAL to V
Residue ID: (' ', 7, ' '), Residue Name: ASP
Converted ASP to D
Residue ID: (' ', 8, ' '), Residue Name: CYX
Converted CYX to X
Residue ID: (' ', 9, ' '), Residue Name: ASN
Converted ASN to N
Residue ID: (' ', 10, ' '), Residue Name: LYS
Converted LYS to K
Residue ID: (' ', 11, ' '), Residue Name: GLY
Converted GLY to G
Residue ID: (' ', 12, ' '), Residue Name: VAL
Converted VAL to V
Residue ID: (' ', 13, ' '), Residue Name: THR
Converted THR to T
Residue ID: (' ', 14, ' '), Residue Name: GLY
Converted GLY to G
Residue ID: (' ', 15, ' '), Residue Name