In [9]:
import os

def extract_sequence_from_atom(pdb_file):
    try:
        with open(pdb_file, 'r') as file:
            lines = file.readlines()
    except FileNotFoundError:
        print(f"File not found: {pdb_file}")
        return {}
    except Exception as e:
        print(f"An error occurred while reading the file: {e}")
        return {}

    sequence_dict = {}
    seen_residues = {}
    for line in lines:
        if line.startswith("ATOM"):
            chain_id = line[21].strip()
            residue_name = line[17:20].strip()
            residue_number = int(line[22:26].strip())
            if chain_id not in sequence_dict:
                sequence_dict[chain_id] = []
                seen_residues[chain_id] = set()
            if (residue_name, residue_number) not in seen_residues[chain_id]:
                sequence_dict[chain_id].append(residue_name)
                seen_residues[chain_id].add((residue_name, residue_number))

    return sequence_dict

def main():
    pdb_file_path = "original_mousecys.pdb"
    if not os.path.isfile(pdb_file_path):
        print(f"Invalid file path: {pdb_file_path}")
        return

    print(f"Reading file: {pdb_file_path}")
    sequence = extract_sequence_from_atom(pdb_file_path)

    if not sequence:
        print("No ATOM records found or file could not be read.")
    else:
        for chain, residues in sequence.items():
            print(f"Chain {chain}: {' '.join(residues)}")

if __name__ == "__main__":
    main()


Reading file: original_mousecys.pdb
Chain X: PRO GLN LYS SER LYS VAL ASP CYX ASN LYS GLY VAL THR GLY THR VAL TYR GLU TYR GLY ALA ASN THR ILE ASP GLY GLY GLU PHE VAL ASN PHE GLN GLN TYR ALA GLY LYS HID ILE LEU PHE VAL ASN VAL ALA SER PHE CYS GLY LEU THR ALA THR TYR PRO GLU LEU ASN THR LEU GLN GLU GLU LEU LYS PRO PHE ASN VAL THR VAL LEU GLY PHE PRO CYS ASN GLN PHE GLY LYS GLN GLU PRO GLY LYS ASN SER GLU ILE LEU LEU GLY LEU LYS TYR VAL ARG PRO GLY GLY GLY TYR VAL PRO ASN PHE GLN LEU PHE GLU LYS GLY ASP VAL ASN GLY ASP ASN GLU GLN LYS VAL PHE SER PHE LEU LYS ASN SER CYX PRO PRO THR SER GLU LEU PHE GLY SER PRO GLU HID LEU PHE TRP ASP PRO MET LYS VAL HID ASP ILE ARG TRP ASN PHE GLU LYS PHE LEU VAL GLY PRO ASP GLY VAL PRO VAL MET ARG TRP PHE HID HID THR PRO VAL ARG ILE VAL GLN SER ASP ILE MET GLU TYR LEU ASN GLN THR SER
