In [None]:
! pip install biopython

Collecting biopython
  Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.84


In [None]:
from Bio import PDB
import os

# Use single chain pdb
# You can edit the mutations in 87 no. line

# One-letter to three-letter amino acid mapping
one_to_three_dict = {
    "A": "ALA", "R": "ARG", "N": "ASN", "D": "ASP", "C": "CYS",
    "E": "GLU", "Q": "GLN", "G": "GLY", "H": "HIS", "I": "ILE",
    "L": "LEU", "K": "LYS", "M": "MET", "F": "PHE", "P": "PRO",
    "S": "SER", "T": "THR", "W": "TRP", "Y": "TYR", "V": "VAL",
    "U": "SEC",  # Selenocysteine
    "O": "PYL"   # Pyrrolysine
}

# Function to mutate the residue
def mutate_residue(structure, mutation):
    # Parse the mutation (e.g., "G12A" -> "G", "12", "A")
    wt_residue = mutation[0]  # Wild-type amino acid
    mut_residue = mutation[-1]  # Mutant amino acid
    res_id = int(mutation[1:-1])  # Residue number

    # Ensure that the wild-type amino acid matches the PDB residue
    model = structure[0]  # Considering the first model of the structure
    for chain in model:
        for residue in chain:
            if residue.get_id()[1] == res_id:
                if residue.get_resname() != one_to_three_dict[wt_residue]:
                    print(f"Residue at position {res_id} is not {wt_residue}. Skipping mutation.")
                    return structure

                # Store the index of the original residue
                residue_index = chain.child_list.index(residue)

                # Now perform the mutation: Remove old residue and add new one
                chain.detach_child(residue.get_id())
                mutant_residue = PDB.Residue.Residue((' ', res_id, ' '), one_to_three_dict[mut_residue], '')

                # Add atoms to the new residue, excluding CB for glycine and handling special cases
                for atom in residue:
                    if mut_residue == 'G' and atom.get_name() == 'CB':
                        continue
                    if mut_residue == 'P' and atom.get_name() == 'H':
                        continue  # Proline does not have a hydrogen atom on the nitrogen
                    if mut_residue == 'C' and atom.get_name() == 'HG':
                        continue  # Cysteine does not have a hydrogen atom on the sulfur when forming disulfide bonds
                    if mut_residue == 'H' and atom.get_name() in ['HD1', 'HE2']:
                        continue  # Histidine can be protonated or deprotonated, exclude hydrogen atoms for non-protonated form

                    new_atom = PDB.Atom.Atom(atom.get_name(), atom.get_coord(), atom.get_bfactor(), atom.get_occupancy(), atom.get_altloc(), atom.get_fullname(), res_id, element=atom.element)
                    mutant_residue.add(new_atom)

                # Insert the new residue at the correct position in the chain
                chain.child_list.insert(residue_index, mutant_residue)
                print(f"Mutated {wt_residue}{res_id} to {mut_residue}{res_id}")
                break
    return structure

# Generate mutant PDB files for a list of mutations
def generate_mutant_pdbs(input_pdb_file, mutations, output_dir):
    # Load the input structure
    parser = PDB.PDBParser(QUIET=True)
    structure = parser.get_structure('input', input_pdb_file)

    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for mutation in mutations:
        # Reload the structure for each mutation to avoid cumulative changes
        structure = parser.get_structure('input', input_pdb_file)

        # Mutate the structure
        mutated_structure = mutate_residue(structure, mutation)

        # Save the mutated pdb
        output_pdb = os.path.join(output_dir, f"mutant_{mutation}.pdb")
        io = PDB.PDBIO()
        io.set_structure(mutated_structure)
        io.save(output_pdb)
        print(f"Mutant PDB saved: {output_pdb}")

if __name__ == "__main__":
    # List of mutations (e.g., G12A, G12D, etc.)
    mutations = ["G12A", "G12D", "G12R", "G12V", "A146T", "A59G", "D33E", "G13D", "Q61L", "V14I"]

    # Input PDB filename
    wild_pdb = "wild.pdb"

    # Output directory for mutated PDB files
    output_dir = "mutant_pdbs"

    # Generate mutant PDBs
    generate_mutant_pdbs(wild_pdb, mutations, output_dir)



Mutated G12 to A12
Mutant PDB saved: mutant_pdbs/mutant_G12A.pdb
Mutated G12 to D12
Mutant PDB saved: mutant_pdbs/mutant_G12D.pdb
Mutated G12 to R12
Mutant PDB saved: mutant_pdbs/mutant_G12R.pdb
Mutated G12 to V12
Mutant PDB saved: mutant_pdbs/mutant_G12V.pdb
Mutated A146 to T146
Mutant PDB saved: mutant_pdbs/mutant_A146T.pdb
Mutated A59 to G59
Mutant PDB saved: mutant_pdbs/mutant_A59G.pdb
Mutated D33 to E33
Mutant PDB saved: mutant_pdbs/mutant_D33E.pdb
Mutated G13 to D13
Mutant PDB saved: mutant_pdbs/mutant_G13D.pdb
Mutated Q61 to L61
Mutant PDB saved: mutant_pdbs/mutant_Q61L.pdb
Mutated V14 to I14
Mutant PDB saved: mutant_pdbs/mutant_V14I.pdb


In [None]:
import shutil
import os

def zip_directory(directory_path, output_zip):
    shutil.make_archive(output_zip, 'zip', directory_path)
    print(f"Directory {directory_path} zipped to {output_zip}.zip")

if __name__ == "__main__":
    directory_path = "mutant_pdbs"  # Directory to be zipped
    output_zip = "mutant_pdbs"  # Output zip file name without extension

    zip_directory(directory_path, output_zip)

Directory mutant_pdbs zipped to mutant_pdbs.zip


**Detailed explanation of the script:**

### Overview
The script automates the mutation of amino acid residues in a protein structure. It reads an input PDB file, applies specified mutations, and saves the mutated structures as new PDB files.

### Key Components

1. **Imports and Amino Acid Mapping**
   ```python
   from Bio import PDB
   import os

   one_to_three_dict = {
       "A": "ALA", "R": "ARG", "N": "ASN", "D": "ASP", "C": "CYS",
       "E": "GLU", "Q": "GLN", "G": "GLY", "H": "HIS", "I": "ILE",
       "L": "LEU", "K": "LYS", "M": "MET", "F": "PHE", "P": "PRO",
       "S": "SER", "T": "THR", "W": "TRP", "Y": "TYR", "V": "VAL",
       "U": "SEC",  # Selenocysteine
       "O": "PYL"   # Pyrrolysine
   }
   ```
   - **Imports**: The script uses the `Bio.PDB` module from Biopython for handling PDB files and `os` for file operations.
   - **Amino Acid Mapping**: A dictionary maps one-letter amino acid codes to three-letter codes.

2. **Mutation Function**
   ```python
   def mutate_residue(structure, mutation):
       wt_residue = mutation[0]  # Wild-type amino acid
       mut_residue = mutation[-1]  # Mutant amino acid
       res_id = int(mutation[1:-1])  # Residue number

       model = structure[0]  # Considering the first model of the structure
       for chain in model:
           for residue in chain:
               if residue.get_id()[1] == res_id:
                   if residue.get_resname() != one_to_three_dict[wt_residue]:
                       print(f"Residue at position {res_id} is not {wt_residue}. Skipping mutation.")
                       return structure

                   residue_index = chain.child_list.index(residue)
                   chain.detach_child(residue.get_id())
                   mutant_residue = PDB.Residue.Residue((' ', res_id, ' '), one_to_three_dict[mut_residue], '')

                   for atom in residue:
                       if mut_residue == 'G' and atom.get_name() == 'CB':
                           continue
                       if mut_residue == 'P' and atom.get_name() == 'H':
                           continue
                       if mut_residue == 'C' and atom.get_name() == 'HG':
                           continue
                       if mut_residue == 'H' and atom.get_name() in ['HD1', 'HE2']:
                           continue

                       new_atom = PDB.Atom.Atom(atom.get_name(), atom.get_coord(), atom.get_bfactor(), atom.get_occupancy(), atom.get_altloc(), atom.get_fullname(), res_id, element=atom.element)
                       mutant_residue.add(new_atom)

                   chain.child_list.insert(residue_index, mutant_residue)
                   print(f"Mutated {wt_residue}{res_id} to {mut_residue}{res_id}")
                   break
       return structure
   ```
   - **Function Definition**: `mutate_residue` takes a structure and a mutation string (e.g., "G12A").
   - **Parsing Mutation**: Extracts the wild-type residue, mutant residue, and residue number from the mutation string.
   - **Model and Chain**: Considers the first model of the structure and iterates through chains and residues.
   - **Residue Matching**: Checks if the residue at the specified position matches the wild-type residue.
   - **Mutation Process**:
     - Stores the index of the original residue.
     - Removes the original residue and creates a new mutant residue.
     - Adds atoms to the new residue, handling special cases (e.g., glycine, proline, cysteine, histidine).
     - Inserts the new residue at the correct position in the chain.

3. **Generating Mutant PDB Files**
   ```python
   def generate_mutant_pdbs(input_pdb_file, mutations, output_dir):
       parser = PDB.PDBParser(QUIET=True)
       structure = parser.get_structure('input', input_pdb_file)

       if not os.path.exists(output_dir):
           os.makedirs(output_dir)

       for mutation in mutations:
           structure = parser.get_structure('input', input_pdb_file)
           mutated_structure = mutate_residue(structure, mutation)

           output_pdb = os.path.join(output_dir, f"mutant_{mutation}.pdb")
           io = PDB.PDBIO()
           io.set_structure(mutated_structure)
           io.save(output_pdb)
           print(f"Mutant PDB saved: {output_pdb}")
   ```
   - **Function Definition**: `generate_mutant_pdbs` takes an input PDB file, a list of mutations, and an output directory.
   - **PDB Parsing**: Loads the input structure using `PDBParser`.
   - **Output Directory**: Creates the output directory if it doesn't exist.
   - **Mutation Loop**: Iterates through the list of mutations:
     - Reloads the structure for each mutation to avoid cumulative changes.
     - Applies the mutation using `mutate_residue`.
     - Saves the mutated structure to a new PDB file.

4. **Main Execution**
   ```python
   if __name__ == "__main__":
       mutations = ["G12A", "G12D", "G12R", "G12V", "A146T", "A59G", "D33E", "G13D", "Q61L", "V14I"]
       wild_pdb = "wild.pdb"
       output_dir = "mutant_pdbs"
       generate_mutant_pdbs(wild_pdb, mutations, output_dir)
   ```
   - **Mutation List**: Specifies the list of mutations to apply.
   - **Input and Output**: Defines the input PDB file and output directory.
   - **Generate Mutants**: Calls `generate_mutant_pdbs` to create the mutated PDB files.

This script should now correctly handle the specified mutations and generate the appropriate mutant PDB files. If you have any further questions or need additional explanations, feel free to ask!