In [2]:
import prody
import pandas as pd

In [3]:
def convert_amino_acid_abbr(three_letter_abbr: str) -> str:
    """
    Converts a three-letter amino acid abbreviation to its one-character equivalent.

    Args:
        three_letter_abbr: A string representing the three-letter amino acid abbreviation
                           (e.g., "Ala", "Gly", "Lys"). Case-insensitive.

    Returns:
        A string representing the one-character amino acid equivalent.
        Returns "Unknown" if the abbreviation is not found in the mapping.

    Examples:
        >>> convert_amino_acid_abbr("Ala")
        'A'
        >>> convert_amino_acid_abbr("gly")
        'G'
        >>> convert_amino_acid_abbr("Lys")
        'K'
        >>> convert_amino_acid_abbr("XYZ")
        'Unknown'
    """
    # Create a dictionary mapping three-letter abbreviations to one-character codes
    # All keys are stored in uppercase for case-insensitive lookup
    amino_acid_map = {
        "ALA": "A",  # Alanine
        "ARG": "R",  # Arginine
        "ASN": "N",  # Asparagine
        "ASP": "D",  # Aspartic Acid
        "CYS": "C",  # Cysteine
        "GLN": "Q",  # Glutamine
        "GLU": "E",  # Glutamic Acid
        "GLY": "G",  # Glycine
        "HIS": "H",  # Histidine
        "ILE": "I",  # Isoleucine
        "LEU": "L",  # Leucine
        "LYS": "K",  # Lysine
        "MET": "M",  # Methionine
        "PHE": "F",  # Phenylalanine
        "PRO": "P",  # Proline
        "SER": "S",  # Serine
        "THR": "T",  # Threonine
        "TRP": "W",  # Tryptophan
        "TYR": "Y",  # Tyrosine
        "VAL": "V",  # Valine
        # Common ambiguous or special codes
        "ASX": "B",  # Asparagine or Aspartic Acid
        "GLX": "Z",  # Glutamine or Glutamic Acid
        "XAA": "X",  # Any amino acid
        "SEC": "U",  # Selenocysteine
        "PYL": "O",  # Pyrrolysine
    }

    # Convert the input abbreviation to uppercase to ensure case-insensitivity
    upper_abbr = three_letter_abbr.upper()

    # Look up the one-character code in the map, return "Unknown" if not found
    return amino_acid_map.get(upper_abbr, "Unknown")

def mutate_sequence(atm_grp, res_num_list, new_res=None):
    prot = atm_grp.select("protein")
    df = pd.DataFrame({"resnum" : prot.getResnums(),
                       "resname" : prot.getResnames()})
    df['single'] = df.resname.apply(convert_amino_acid_abbr)
    df.drop_duplicates(subset="resnum",inplace=True)
    if new_res is not None:
        df.loc[[x in res_num_list for x in df.resnum],'single'] = new_res
    return "".join(df.query("single != 'Unknown'").drop_duplicates(subset="resnum").single.values)

In [19]:
prot = prody.parsePDB("8x61")

In [20]:
asite_res = list(set(prot.select("protein and (chid A or chid B) within 3.5 of resname ATP").getResnums()))

In [21]:
asite_res

[195, 36, 37, 38, 39, 40, 41, 42, 11, 43, 136, 139, 141, 142, 15, 86]

Get the wild type sequence

In [22]:
mutate_sequence(prot, asite_res)

'MIRFEHVSKAYLGGRQALQGVTFHMQPGEMAFLTGHSGAGKSTLLKLICGIERPSAGKIWFSGHDITRLKNREVPFLRRQIGMIFQDHHLLMDRTVYDNVAIPLIIAGASGDDIRRRVSAALDKVGLLDKAKNFPIQLSGGEQQRVGIARAVVNKPAVLLADQPTGNLDDALSEGILRLFEEFNRVGVTVLMATHDINLISRRSYRMLTLSDGHLHNEQVRYAFHGALQDLKSKPFATFLTVMVIAISLTLPSVCYMVYKNVNVGRVSAMIGVLMVAAVFLVIGNSVRLSIFARRDSINVQKLIGATDGFILRPFLYGGALLGFSGALLSLILSEILVLRLNGLSFDECLLLLLVCSMIGWVAAWLATVQHLRHFTPLIE'

Get the sequence with the active site residues mutated to **GLY**. 

In [23]:
mutate_sequence(prot, asite_res, 'G')

'MIRFEHVSKAGLGGGQALQGVTFHMQPGEMAFLTGGGGGGGGGLLKLICGIERPSAGKIWFSGHDITRLKNREVPFLRRQIGMIFGDHHLLMDRTVYDNVAIPLIIAGASGDDIRRRVSAALDKVGLLDKAKNFPGQLGGGGQQRVGIARAVVNKPAVLLADQPTGNLDDALSEGILRLFEEFNRVGVTVLMATGDINLISRRSYRMLTLSDGHLHNEQVRYAFHGALQDLKSKPFATFLTVMVIAISLTLPSVCYMVYKNVNVGRVSAMIGVLMVAAVFLVIGNSVRLSIFARRDSINVQKLIGATDGFILRPFLYGGALLGFSGALLSLILSEILVLRLNGLSFDECLLLLLVCSMIGWVAAWLATVQHLRHFTPLIE'

Get the sequence with the active site residues mutated to **PHE**.

In [24]:
mutate_sequence(prot, asite_res, 'F')

'MIRFEHVSKAFLGGFQALQGVTFHMQPGEMAFLTGFFFFFFFFLLKLICGIERPSAGKIWFSGHDITRLKNREVPFLRRQIGMIFFDHHLLMDRTVYDNVAIPLIIAGASGDDIRRRVSAALDKVGLLDKAKNFPFQLFGFFQQRVGIARAVVNKPAVLLADQPTGNLDDALSEGILRLFEEFNRVGVTVLMATFDINLISRRSYRMLTLSDGHLHNEQVRYAFHGALQDLKSKPFATFLTVMVIAISLTLPSVCYMVYKNVNVGRVSAMIGVLMVAAVFLVIGNSVRLSIFARRDSINVQKLIGATDGFILRPFLYGGALLGFSGALLSLILSEILVLRLNGLSFDECLLLLLVCSMIGWVAAWLATVQHLRHFTPLIE'

The SMILES for [ATP](https://www.rcsb.org/ligand/ATP) is `c1nc(c2c(n1)n(cn2)C3C(C(C(O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N ATP`

In [25]:
!pwd

/Users/pwalters/DATA/BLOG/LILL
