In [None]:
pip install biopython

Collecting biopython
  Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.84


In [None]:
pip install --upgrade tqdm



In [None]:
from Bio.PDB import PDBParser, Selection, NeighborSearch
from Bio.PDB.Polypeptide import is_aa
from tqdm import tqdm
import torch

In [None]:
# Install PyMOL
!apt-get install -y pymol

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  apbs apbs-data freeglut3 libapbs3 libevdev2 libglu1-mesa libgudev-1.0-0 libinput-bin libinput10
  libmaloc1 libmd4c0 libmtdev1 libqt5core5a libqt5dbus5 libqt5designer5 libqt5gui5 libqt5help5
  libqt5network5 libqt5opengl5 libqt5printsupport5 libqt5sql5 libqt5sql5-sqlite libqt5svg5
  libqt5test5 libqt5widgets5 libqt5xml5 libwacom-bin libwacom-common libwacom9 libxcb-icccm4
  libxcb-image0 libxcb-keysyms1 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xinput0
  libxcb-xkb1 libxkbcommon-x11-0 pymol-data python3-numpy python3-opengl python3-pymol
  python3-pyqt5 python3-pyqt5.qtopengl python3-pyqt5.sip qt5-gtk-platformtheme qttranslations5-l10n
Suggested packages:
  qt5-image-formats-plugins qtwayland5 python-numpy-doc python3-pytest libgle3
The following NEW packages will be installed:
  apbs apbs-data freeglut3 libapbs3 libevdev

In [None]:
# Import the necessary modules
import pymol2

# Create an instance of the PyMOL session
with pymol2.PyMOL() as pymol:
    # Initialize PyMOL
    pymol.cmd.reinitialize()

    # Specify the path to the PDB file
    pdb_file_path = "/content/2zffg.pdb"

    # Load the structure file
    pymol.cmd.load(pdb_file_path)

    # Identify glycine residues
    glycine_residues = pymol.cmd.get_model("resn GLY").atom

    # Loop through glycine residues
    for atom in glycine_residues:
        residue_num = atom.resi
        chain = atom.chain
        # Construct the selection string in the format "resi X and chain Y"
        selection_str = f"resi {residue_num} and chain {chain}"
        # Apply the mutation using the mutagenesis command
        pymol.cmd.wizard("mutagenesis")
        pymol.cmd.refresh_wizard()
        pymol.cmd.get_wizard().do_select(selection_str)
        pymol.cmd.get_wizard().set_mode("ALA")
        pymol.cmd.get_wizard().apply()
        pymol.cmd.delete(selection_str)  # Delete the original residue to avoid clashes

    # Save the mutated structure
    output_file_path = "/content/2zffg_alanine.pdb"
    pymol.cmd.save(output_file_path)


In [None]:
#FInal_all(3His)
import pandas as pd
import numpy as np
from Bio.PDB import PDBParser
import itertools
import os

# File paths
pdb_file = "/content/OmpF_dimer_alanine.pdb"
output_file_distances = "/content/OmpF_dimer_alanine_132_distances_tight.xlsx"
output_file_ratio = "/content/OmpF_dimer_alanine_132_ratio_tight.xlsx"
output_file_angles = "/content/OmpF_dimer_alanine_132_angles_tight.xlsx"
output_file_vector_angles_final_filter = "/content/OmpF_dimer_alanine_132_filter_tight.xlsx"
pymol_script_file = "/content/OmpF_dimer_alanine_132_output_tight.pml"

# PDB Parser setup
parser = PDBParser(QUIET=True)
structure = parser.get_structure('protein', pdb_file)
model = structure[0]
residues = [residue for residue in model.get_residues() if residue.get_id()[0] == ' ']

# Function to calculate angle between residue vectors
def calculate_angle(residue1, residue2):
    vector_ca1 = residue1['CB'].coord - residue1['CA'].coord if residue1.has_id('CB') else np.zeros(3)
    vector_ca2 = residue2['CB'].coord - residue2['CA'].coord if residue2.has_id('CB') else np.zeros(3)
    dot_product = np.dot(vector_ca1, vector_ca2)
    magnitude_product = np.linalg.norm(vector_ca1) * np.linalg.norm(vector_ca2)
    if magnitude_product == 0:
        return np.nan
    cosine_angle = dot_product / magnitude_product
    cosine_angle = np.clip(cosine_angle, -1.0, 1.0)
    return np.degrees(np.arccos(cosine_angle))

# Filter combinations
combinations = itertools.combinations(residues, 3)
selected_combinations = [comb for comb in combinations if any(res.get_id()[1] == 83for res in comb)]

# Distance ranges ()
alpha_distance_range = (5.6, 10.4)
beta_distance_range = (5.7, 9.3)

# Angle threshold ranges
angle_threshold_1_2_range = (15, 110)
angle_threshold_1_3_range = (15, 110)
angle_threshold_2_3_range = (15, 110)

# Step 1: Distance Filter
filtered_data_distances = []

for idx, combination in enumerate(selected_combinations):
    alpha_distances, beta_distances = [], []

    try:
        for res1, res2 in itertools.combinations(combination, 2):
            if res1.has_id('CA') and res2.has_id('CA'):
                ca1, ca2 = res1['CA'].coord, res2['CA'].coord
                alpha_distance = np.linalg.norm(ca1 - ca2)
            else:
                continue

            if res1.has_id('CB') and res2.has_id('CB'):
                cb1, cb2 = res1['CB'].coord, res2['CB'].coord
                beta_distance = np.linalg.norm(cb1 - cb2)
            else:
                continue

            if (alpha_distance_range[0] <= alpha_distance <= alpha_distance_range[1] and
                    beta_distance_range[0] <= beta_distance <= beta_distance_range[1]):
                alpha_distances.append(alpha_distance)
                beta_distances.append(beta_distance)

        if len(alpha_distances) >= 3 and len(beta_distances) >= 3:
            filtered_data_distances.append({
                'PDB_ID': pdb_file,
                'Combination': combination,
                'Coord_chain_id_number1': combination[0].get_full_id()[2],
                'Coord_residue_number1': combination[0].get_full_id()[3][1],
                'Coord_residue_name1': combination[0].get_resname(),
                'Coord_atom_name1': 'CA',
                'Coord_chain_id_number2': combination[1].get_full_id()[2],
                'Coord_residue_number2': combination[1].get_full_id()[3][1],
                'Coord_residue_name2': combination[1].get_resname(),
                'Coord_atom_name2': 'CA',
                'Coord_chain_id_number3': combination[2].get_full_id()[2],
                'Coord_residue_number3': combination[2].get_full_id()[3][1],
                'Coord_residue_name3': combination[2].get_resname(),
                'Coord_atom_name3': 'CA',
                'Alpha Distance 1': alpha_distances[0],
                'Alpha Distance 2': alpha_distances[1],
                'Alpha Distance 3': alpha_distances[2],
                'Beta Distance 1': beta_distances[0],
                'Beta Distance 2': beta_distances[1],
                'Beta Distance 3': beta_distances[2]
            })

    except KeyError as e:
        print(f"Error processing combination {combination}: {e}")

# Create DataFrame for distances
column_order = [
    'PDB_ID',
    'Combination',
    'Coord_chain_id_number1', 'Coord_residue_number1', 'Coord_residue_name1', 'Coord_atom_name1',
    'Coord_chain_id_number2', 'Coord_residue_number2', 'Coord_residue_name2', 'Coord_atom_name2',
    'Coord_chain_id_number3', 'Coord_residue_number3', 'Coord_residue_name3', 'Coord_atom_name3',
    'Alpha Distance 1', 'Alpha Distance 2', 'Alpha Distance 3',
    'Beta Distance 1', 'Beta Distance 2', 'Beta Distance 3'
]

df_distances = pd.DataFrame(filtered_data_distances)
df_distances = df_distances[column_order]
df_distances.to_excel(output_file_distances, index=False)

# Step 2: Ratio Filter
filtered_data_ratio = []

for idx, row in df_distances.iterrows():
    alpha_distances = [row['Alpha Distance 1'], row['Alpha Distance 2'], row['Alpha Distance 3']]
    beta_distances = [row['Beta Distance 1'], row['Beta Distance 2'], row['Beta Distance 3']]

    for i in range(3):
        alpha_distance_i = alpha_distances[i]
        beta_distance_i = beta_distances[i]
        ratio = alpha_distance_i / beta_distance_i

        # Check if the ratio is within the specified range (0.7 to 1.3)
        if not (0.9 <= ratio <= 1.2):
            break
    else:
        filtered_data_ratio.append(row)

# Create DataFrame for ratio
df_ratio = pd.DataFrame(filtered_data_ratio)
df_ratio = df_ratio[column_order]
df_ratio.to_excel(output_file_ratio, index=False)

# Step 3: Angle Filter
filtered_data_angles = []

for idx, row in df_ratio.iterrows():
    combination = row['Combination']
    angle_1_2 = calculate_angle(combination[0], combination[1])
    angle_1_3 = calculate_angle(combination[0], combination[2])
    angle_2_3 = calculate_angle(combination[1], combination[2])

    if (angle_threshold_1_2_range[0] <= angle_1_2 <= angle_threshold_1_2_range[1] and
            angle_threshold_1_3_range[0] <= angle_1_3 <= angle_threshold_1_3_range[1] and
            angle_threshold_2_3_range[0] <= angle_2_3 <= angle_threshold_2_3_range[1]):
        row['Angle_1_2'] = angle_1_2
        row['Angle_1_3'] = angle_1_3
        row['Angle_2_3'] = angle_2_3
        filtered_data_angles.append(row)

# Create DataFrame for angles
df_angles = pd.DataFrame(filtered_data_angles)
df_angles = df_angles[column_order + ['Angle_1_2', 'Angle_1_3', 'Angle_2_3']]
df_angles.to_excel(output_file_angles, index=False)

# Step 4: Vector Angle Filter
def calculate_vector_angle(vector1, vector2):
    dot_product = np.dot(vector1, vector2)
    magnitude_product = np.linalg.norm(vector1) * np.linalg.norm(vector2)
    if magnitude_product == 0:
        return np.nan
    cosine_angle = dot_product / magnitude_product
    cosine_angle = np.clip(cosine_angle, -1.0, 1.0)
    return np.degrees(np.arccos(cosine_angle))

def process_row(row):
    pdb_file_path = pdb_file
    if not os.path.isfile(pdb_file_path):
        print(f"PDB file not found: {pdb_file_path}")
        return [None, None, None]

    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('protein', pdb_file_path)
    model = structure[0]

    try:
        residues = row['Combination']
        print(f"Residues loaded: {residues}")

        # Initialize the list of angles
        vector_angles = []

        # Pairs to calculate vector angles between
        pairs = [(0, 1), (0, 2), (1, 2)]

        for i, j in pairs:
            try:
                CA1 = residues[i]['CA']
                CA2 = residues[j]['CA']
                CB1 = residues[i]['CB'] if 'CB' in residues[i] else CA1
                CB2 = residues[j]['CB'] if 'CB' in residues[j] else CA2

                vector_CA = CA2.coord - CA1.coord
                vector_CB = CB2.coord - CB1.coord

                angle = calculate_vector_angle(vector_CA, vector_CB)
                vector_angles.append(angle)
            except KeyError as e:
                print(f"KeyError for residues {residues[i]} and {residues[j]}: {e}")
                vector_angles.append(None)

        return vector_angles
    except KeyError as e:
        print(f"KeyError: {e}")
        return [None, None, None]

# Apply the function to each row and store the results
vector_angle_results = df_angles.apply(process_row, axis=1, result_type='expand')
df_angles[['Vector_Angle_1_2', 'Vector_Angle_1_3', 'Vector_Angle_2_3']] = vector_angle_results

# Set angle threshold ranges for vector angles
angle_thresholds = {
    'Vector_Angle_1_2': (0, 15),
    'Vector_Angle_1_3': (0, 15),
    'Vector_Angle_2_3': (0, 15)
}

# Create filter columns based on angle thresholds
for col, (low, high) in angle_thresholds.items():
    df_angles[f'{col}_Filter'] = df_angles.apply(lambda row: low < row[col] < high if pd.notnull(row[col]) else False, axis=1)

# Combine filters
df_angles['Vector_Angle_Filter'] = df_angles[[f'{col}_Filter' for col in angle_thresholds.keys()]].all(axis=1)

# Final filter application based on vector angles
df_final_filter = df_angles[df_angles['Vector_Angle_Filter']]

# Save the final filtered DataFrame to an Excel file
df_final_filter.to_excel(output_file_vector_angles_final_filter, index=False)

# Generate PyMOL script file
pymol_script_commands = []

# Adding a sequential combination number starting from 1
df_final_filter['Combination_Number'] = range(1, len(df_final_filter) + 1)

for index, row in df_final_filter.iterrows():
    combination = row['Combination']
    chain1, res1 = combination[0].get_full_id()[2], combination[0].get_full_id()[3][1]
    chain2, res2 = combination[1].get_full_id()[2], combination[1].get_full_id()[3][1]
    chain3, res3 = combination[2].get_full_id()[2], combination[2].get_full_id()[3][1]

    selection_name = f"obj{row['Combination_Number']:02d}"
    pymol_script_commands.append(f"select {selection_name}, (chain {chain1} and resi {res1}) or (chain {chain2} and resi {res2}) or (chain {chain3} and resi {res3})")
    pymol_script_commands.append(f"create {selection_name}_residue1, /{pdb_file}//{chain1}/{res1}")
    pymol_script_commands.append(f"create {selection_name}_residue2, /{pdb_file}//{chain2}/{res2}")
    pymol_script_commands.append(f"create {selection_name}_residue3, /{pdb_file}//{chain3}/{res3}")

# Save PyMOL commands to file
with open(pymol_script_file, 'w') as f:
    f.write("# PyMOL script for visualizing filtered residue combinations\n\n")
    for command in pymol_script_commands:
        f.write(command + '\n')

print(f"\nPyMOL script saved to {pymol_script_file}")

Residues loaded: (<Residue ALA het=  resseq=41 icode= >, <Residue GLN het=  resseq=60 icode= >, <Residue LEU het=  resseq=83 icode= >)
Residues loaded: (<Residue ALA het=  resseq=41 icode= >, <Residue LEU het=  resseq=83 icode= >, <Residue ALA het=  resseq=99 icode= >)
Residues loaded: (<Residue GLN het=  resseq=60 icode= >, <Residue LEU het=  resseq=83 icode= >, <Residue PHE het=  resseq=85 icode= >)
Residues loaded: (<Residue GLN het=  resseq=60 icode= >, <Residue LEU het=  resseq=83 icode= >, <Residue ALA het=  resseq=99 icode= >)
Residues loaded: (<Residue GLN het=  resseq=60 icode= >, <Residue LEU het=  resseq=83 icode= >, <Residue ALA het=  resseq=99 icode= >)

PyMOL script saved to /content/OmpF_dimer_alanine_132_output_tight.pml


In [None]:
#FInal_all( 2His/1Glu)
import pandas as pd
import numpy as np
from Bio.PDB import PDBParser
import itertools
import os

# File paths
pdb_file = "/content/3iq6_alanine.pdb"
output_file_distances = "/content/3iq6_ala_89_distances_tight.xlsx"
output_file_ratio = "/content/3iq6_ala_89_ratio_tight.xlsx"
output_file_angles = "/content/3iq6_ala_89_angles_tight.xlsx"
output_file_vector_angles_final_filter = "/content/3iq6_ala_89_filter_tight.xlsx"
pymol_script_file = "/content/3iq6_ala_89_utput_tight.pml"

# PDB Parser setup
parser = PDBParser(QUIET=True)
structure = parser.get_structure('protein', pdb_file)
model = structure[0]
residues = [residue for residue in model.get_residues() if residue.get_id()[0] == ' ']

# Function to calculate angle between residue vectors
def calculate_angle(residue1, residue2):
    vector_ca1 = residue1['CB'].coord - residue1['CA'].coord if residue1.has_id('CB') else np.zeros(3)
    vector_ca2 = residue2['CB'].coord - residue2['CA'].coord if residue2.has_id('CB') else np.zeros(3)
    dot_product = np.dot(vector_ca1, vector_ca2)
    magnitude_product = np.linalg.norm(vector_ca1) * np.linalg.norm(vector_ca2)
    if magnitude_product == 0:
        return np.nan
    cosine_angle = dot_product / magnitude_product
    cosine_angle = np.clip(cosine_angle, -1.0, 1.0)
    return np.degrees(np.arccos(cosine_angle))

# Filter combinations
combinations = itertools.combinations(residues, 3)
selected_combinations = [comb for comb in combinations if any(res.get_id()[1] == 89 for res in comb)]

# Distance ranges
alpha_distance_range = (3.8, 13)
beta_distance_range = (4.1, 10.9)

# Angle threshold ranges
angle_threshold_1_2_range = (1.5, 125)
angle_threshold_1_3_range = (1.5, 125)
angle_threshold_2_3_range = (1.5, 125)

# Step 1: Distance Filter
filtered_data_distances = []

for idx, combination in enumerate(selected_combinations):
    alpha_distances, beta_distances = [], []

    try:
        for res1, res2 in itertools.combinations(combination, 2):
            if res1.has_id('CA') and res2.has_id('CA'):
                ca1, ca2 = res1['CA'].coord, res2['CA'].coord
                alpha_distance = np.linalg.norm(ca1 - ca2)
            else:
                continue

            if res1.has_id('CB') and res2.has_id('CB'):
                cb1, cb2 = res1['CB'].coord, res2['CB'].coord
                beta_distance = np.linalg.norm(cb1 - cb2)
            else:
                continue

            if (alpha_distance_range[0] <= alpha_distance <= alpha_distance_range[1] and
                    beta_distance_range[0] <= beta_distance <= beta_distance_range[1]):
                alpha_distances.append(alpha_distance)
                beta_distances.append(beta_distance)

        if len(alpha_distances) >= 3 and len(beta_distances) >= 3:
            filtered_data_distances.append({
                'PDB_ID': pdb_file,
                'Combination': combination,
                'Coord_chain_id_number1': combination[0].get_full_id()[2],
                'Coord_residue_number1': combination[0].get_full_id()[3][1],
                'Coord_residue_name1': combination[0].get_resname(),
                'Coord_atom_name1': 'CA',
                'Coord_chain_id_number2': combination[1].get_full_id()[2],
                'Coord_residue_number2': combination[1].get_full_id()[3][1],
                'Coord_residue_name2': combination[1].get_resname(),
                'Coord_atom_name2': 'CA',
                'Coord_chain_id_number3': combination[2].get_full_id()[2],
                'Coord_residue_number3': combination[2].get_full_id()[3][1],
                'Coord_residue_name3': combination[2].get_resname(),
                'Coord_atom_name3': 'CA',
                'Alpha Distance 1': alpha_distances[0],
                'Alpha Distance 2': alpha_distances[1],
                'Alpha Distance 3': alpha_distances[2],
                'Beta Distance 1': beta_distances[0],
                'Beta Distance 2': beta_distances[1],
                'Beta Distance 3': beta_distances[2]
            })

    except KeyError as e:
        print(f"Error processing combination {combination}: {e}")

# Create DataFrame for distances
column_order = [
    'PDB_ID',
    'Combination',
    'Coord_chain_id_number1', 'Coord_residue_number1', 'Coord_residue_name1', 'Coord_atom_name1',
    'Coord_chain_id_number2', 'Coord_residue_number2', 'Coord_residue_name2', 'Coord_atom_name2',
    'Coord_chain_id_number3', 'Coord_residue_number3', 'Coord_residue_name3', 'Coord_atom_name3',
    'Alpha Distance 1', 'Alpha Distance 2', 'Alpha Distance 3',
    'Beta Distance 1', 'Beta Distance 2', 'Beta Distance 3'
]

df_distances = pd.DataFrame(filtered_data_distances)
df_distances = df_distances[column_order]
df_distances.to_excel(output_file_distances, index=False)

# Step 2: Ratio Filter
filtered_data_ratio = []

for idx, row in df_distances.iterrows():
    alpha_distances = [row['Alpha Distance 1'], row['Alpha Distance 2'], row['Alpha Distance 3']]
    beta_distances = [row['Beta Distance 1'], row['Beta Distance 2'], row['Beta Distance 3']]

    for i in range(3):
        alpha_distance_i = alpha_distances[i]
        beta_distance_i = beta_distances[i]
        ratio = alpha_distance_i / beta_distance_i

        # Check if the ratio is within the specified range (0.8 to 1.3)
        if not (0.9 <= ratio <= 1.3):
            break
    else:
        filtered_data_ratio.append(row)

# Create DataFrame for ratio
df_ratio = pd.DataFrame(filtered_data_ratio)
df_ratio = df_ratio[column_order]
df_ratio.to_excel(output_file_ratio, index=False)

# Step 3: Angle Filter
filtered_data_angles = []

for idx, row in df_ratio.iterrows():
    combination = row['Combination']
    angle_1_2 = calculate_angle(combination[0], combination[1])
    angle_1_3 = calculate_angle(combination[0], combination[2])
    angle_2_3 = calculate_angle(combination[1], combination[2])

    if (angle_threshold_1_2_range[0] <= angle_1_2 <= angle_threshold_1_2_range[1] and
            angle_threshold_1_3_range[0] <= angle_1_3 <= angle_threshold_1_3_range[1] and
            angle_threshold_2_3_range[0] <= angle_2_3 <= angle_threshold_2_3_range[1]):
        row['Angle_1_2'] = angle_1_2
        row['Angle_1_3'] = angle_1_3
        row['Angle_2_3'] = angle_2_3
        filtered_data_angles.append(row)

# Create DataFrame for angles
df_angles = pd.DataFrame(filtered_data_angles)
df_angles = df_angles[column_order + ['Angle_1_2', 'Angle_1_3', 'Angle_2_3']]
df_angles.to_excel(output_file_angles, index=False)

# Step 4: Vector Angle Filter
def calculate_vector_angle(vector1, vector2):
    dot_product = np.dot(vector1, vector2)
    magnitude_product = np.linalg.norm(vector1) * np.linalg.norm(vector2)
    if magnitude_product == 0:
        return np.nan
    cosine_angle = dot_product / magnitude_product
    cosine_angle = np.clip(cosine_angle, -1.0, 1.0)
    return np.degrees(np.arccos(cosine_angle))

def process_row(row):
    pdb_file_path = pdb_file
    if not os.path.isfile(pdb_file_path):
        print(f"PDB file not found: {pdb_file_path}")
        return [None, None, None]

    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('protein', pdb_file_path)
    model = structure[0]

    try:
        residues = row['Combination']
        print(f"Residues loaded: {residues}")

        # Initialize the list of angles
        vector_angles = []

        # Pairs to calculate vector angles between
        pairs = [(0, 1), (0, 2), (1, 2)]

        for i, j in pairs:
            try:
                CA1 = residues[i]['CA']
                CA2 = residues[j]['CA']
                CB1 = residues[i]['CB'] if 'CB' in residues[i] else CA1
                CB2 = residues[j]['CB'] if 'CB' in residues[j] else CA2

                vector_CA = CA2.coord - CA1.coord
                vector_CB = CB2.coord - CB1.coord

                angle = calculate_vector_angle(vector_CA, vector_CB)
                vector_angles.append(angle)
            except KeyError as e:
                print(f"KeyError for residues {residues[i]} and {residues[j]}: {e}")
                vector_angles.append(None)

        return vector_angles
    except KeyError as e:
        print(f"KeyError: {e}")
        return [None, None, None]

# Apply the function to each row and store the results
vector_angle_results = df_angles.apply(process_row, axis=1, result_type='expand')
df_angles[['Vector_Angle_1_2', 'Vector_Angle_1_3', 'Vector_Angle_2_3']] = vector_angle_results

# Set angle threshold ranges for vector angles
angle_thresholds = {
    'Vector_Angle_1_2': (0, 20),
    'Vector_Angle_1_3': (0, 20),
    'Vector_Angle_2_3': (0, 20)
}

# Create filter columns based on angle thresholds
for col, (low, high) in angle_thresholds.items():
    df_angles[f'{col}_Filter'] = df_angles.apply(lambda row: low < row[col] < high if pd.notnull(row[col]) else False, axis=1)

# Combine filters
df_angles['Vector_Angle_Filter'] = df_angles[[f'{col}_Filter' for col in angle_thresholds.keys()]].all(axis=1)

# Final filter application based on vector angles
df_final_filter = df_angles[df_angles['Vector_Angle_Filter']]

# Save the final filtered DataFrame to an Excel file
df_final_filter.to_excel(output_file_vector_angles_final_filter, index=False)

# Generate PyMOL script file
pymol_script_commands = []

# Adding a sequential combination number starting from 1
df_final_filter['Combination_Number'] = range(1, len(df_final_filter) + 1)

for index, row in df_final_filter.iterrows():
    combination = row['Combination']
    chain1, res1 = combination[0].get_full_id()[2], combination[0].get_full_id()[3][1]
    chain2, res2 = combination[1].get_full_id()[2], combination[1].get_full_id()[3][1]
    chain3, res3 = combination[2].get_full_id()[2], combination[2].get_full_id()[3][1]

    selection_name = f"obj{row['Combination_Number']:02d}"
    pymol_script_commands.append(f"select {selection_name}, (chain {chain1} and resi {res1}) or (chain {chain2} and resi {res2}) or (chain {chain3} and resi {res3})")
    pymol_script_commands.append(f"create {selection_name}_residue1, /{pdb_file}//{chain1}/{res1}")
    pymol_script_commands.append(f"create {selection_name}_residue2, /{pdb_file}//{chain2}/{res2}")
    pymol_script_commands.append(f"create {selection_name}_residue3, /{pdb_file}//{chain3}/{res3}")

# Save PyMOL commands to file
with open(pymol_script_file, 'w') as f:
    f.write("# PyMOL script for visualizing filtered residue combinations\n\n")
    for command in pymol_script_commands:
        f.write(command + '\n')

print(f"\nPyMOL script saved to {pymol_script_file}")

Residues loaded: (<Residue ILE het=  resseq=17 icode= >, <Residue ALA het=  resseq=89 icode= >, <Residue GLU het=  resseq=92 icode= >)
Residues loaded: (<Residue ASP het=  resseq=60 icode= >, <Residue HIS het=  resseq=63 icode= >, <Residue ALA het=  resseq=89 icode= >)
Residues loaded: (<Residue ASP het=  resseq=60 icode= >, <Residue ALA het=  resseq=64 icode= >, <Residue ALA het=  resseq=89 icode= >)
Residues loaded: (<Residue ASP het=  resseq=60 icode= >, <Residue ALA het=  resseq=100 icode= >, <Residue ALA het=  resseq=89 icode= >)
Residues loaded: (<Residue ASP het=  resseq=60 icode= >, <Residue LYS het=  resseq=104 icode= >, <Residue ALA het=  resseq=89 icode= >)
Residues loaded: (<Residue ASP het=  resseq=60 icode= >, <Residue LEU het=  resseq=78 icode= >, <Residue ALA het=  resseq=89 icode= >)
Residues loaded: (<Residue ASP het=  resseq=60 icode= >, <Residue LYS het=  resseq=85 icode= >, <Residue ALA het=  resseq=89 icode= >)
Residues loaded: (<Residue HIS het=  resseq=63 icode=

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final_filter['Combination_Number'] = range(1, len(df_final_filter) + 1)


In [None]:
#FInal_all(2His/1Asp)
import pandas as pd
import numpy as np
from Bio.PDB import PDBParser
import itertools
import os

# File paths
pdb_file = "/content/1EP0_alanine_dimer.pdb"
output_file_distances = "/content/1EP0_130_2His_1Asp_distances2.xlsx"
output_file_ratio = "/content/1EP0_130_2His_1Asp_ratio2.xlsx"
output_file_angles = "/content/1EP0_130_2His_1Asp_angles2.xlsx"
output_file_vector_angles_final_filter = "/content/1EP0_130_2His_1Asp_final_filter2.xlsx"
pymol_script_file = "/content/1EP0_130_2His_1Asp_output2.pml"

# PDB Parser setup
parser = PDBParser(QUIET=True)
structure = parser.get_structure('protein', pdb_file)
model = structure[0]
residues = [residue for residue in model.get_residues() if residue.get_id()[0] == ' ']

# Function to calculate angle between residue vectors
def calculate_angle(residue1, residue2):
    vector_ca1 = residue1['CB'].coord - residue1['CA'].coord if residue1.has_id('CB') else np.zeros(3)
    vector_ca2 = residue2['CB'].coord - residue2['CA'].coord if residue2.has_id('CB') else np.zeros(3)
    dot_product = np.dot(vector_ca1, vector_ca2)
    magnitude_product = np.linalg.norm(vector_ca1) * np.linalg.norm(vector_ca2)
    if magnitude_product == 0:
        return np.nan
    cosine_angle = dot_product / magnitude_product
    cosine_angle = np.clip(cosine_angle, -1.0, 1.0)
    return np.degrees(np.arccos(cosine_angle))

# Filter combinations
combinations = itertools.combinations(residues, 3)
selected_combinations = [comb for comb in combinations if any(res.get_id()[1] == 130 for res in comb)]

# Distance ranges
alpha_distance_range = (5.5, 10.0)
beta_distance_range = (5.3, 8.9)

# Angle threshold ranges
angle_threshold_1_2_range = (2, 135)
angle_threshold_1_3_range = (2, 135)
angle_threshold_2_3_range = (2, 135)

# Step 1: Distance Filter
filtered_data_distances = []

for idx, combination in enumerate(selected_combinations):
    alpha_distances, beta_distances = [], []

    try:
        for res1, res2 in itertools.combinations(combination, 2):
            if res1.has_id('CA') and res2.has_id('CA'):
                ca1, ca2 = res1['CA'].coord, res2['CA'].coord
                alpha_distance = np.linalg.norm(ca1 - ca2)
            else:
                continue

            if res1.has_id('CB') and res2.has_id('CB'):
                cb1, cb2 = res1['CB'].coord, res2['CB'].coord
                beta_distance = np.linalg.norm(cb1 - cb2)
            else:
                continue

            if (alpha_distance_range[0] <= alpha_distance <= alpha_distance_range[1] and
                    beta_distance_range[0] <= beta_distance <= beta_distance_range[1]):
                alpha_distances.append(alpha_distance)
                beta_distances.append(beta_distance)

        if len(alpha_distances) >= 3 and len(beta_distances) >= 3:
            filtered_data_distances.append({
                'PDB_ID': pdb_file,
                'Combination': combination,
                'Coord_chain_id_number1': combination[0].get_full_id()[2],
                'Coord_residue_number1': combination[0].get_full_id()[3][1],
                'Coord_residue_name1': combination[0].get_resname(),
                'Coord_atom_name1': 'CA',
                'Coord_chain_id_number2': combination[1].get_full_id()[2],
                'Coord_residue_number2': combination[1].get_full_id()[3][1],
                'Coord_residue_name2': combination[1].get_resname(),
                'Coord_atom_name2': 'CA',
                'Coord_chain_id_number3': combination[2].get_full_id()[2],
                'Coord_residue_number3': combination[2].get_full_id()[3][1],
                'Coord_residue_name3': combination[2].get_resname(),
                'Coord_atom_name3': 'CA',
                'Alpha Distance 1': alpha_distances[0],
                'Alpha Distance 2': alpha_distances[1],
                'Alpha Distance 3': alpha_distances[2],
                'Beta Distance 1': beta_distances[0],
                'Beta Distance 2': beta_distances[1],
                'Beta Distance 3': beta_distances[2]
            })

    except KeyError as e:
        print(f"Error processing combination {combination}: {e}")

# Create DataFrame for distances
column_order = [
    'PDB_ID',
    'Combination',
    'Coord_chain_id_number1', 'Coord_residue_number1', 'Coord_residue_name1', 'Coord_atom_name1',
    'Coord_chain_id_number2', 'Coord_residue_number2', 'Coord_residue_name2', 'Coord_atom_name2',
    'Coord_chain_id_number3', 'Coord_residue_number3', 'Coord_residue_name3', 'Coord_atom_name3',
    'Alpha Distance 1', 'Alpha Distance 2', 'Alpha Distance 3',
    'Beta Distance 1', 'Beta Distance 2', 'Beta Distance 3'
]

df_distances = pd.DataFrame(filtered_data_distances)
df_distances = df_distances[column_order]
df_distances.to_excel(output_file_distances, index=False)

# Step 2: Ratio Filter
filtered_data_ratio = []

for idx, row in df_distances.iterrows():
    alpha_distances = [row['Alpha Distance 1'], row['Alpha Distance 2'], row['Alpha Distance 3']]
    beta_distances = [row['Beta Distance 1'], row['Beta Distance 2'], row['Beta Distance 3']]

    for i in range(3):
        alpha_distance_i = alpha_distances[i]
        beta_distance_i = beta_distances[i]
        ratio = alpha_distance_i / beta_distance_i

        # Check if the ratio is within the specified range (0.7 to 1.3)
        if not (0.8 <= ratio <= 1.4):
            break
    else:
        filtered_data_ratio.append(row)

# Create DataFrame for ratio
df_ratio = pd.DataFrame(filtered_data_ratio)
df_ratio = df_ratio[column_order]
df_ratio.to_excel(output_file_ratio, index=False)

# Step 3: Angle Filter
filtered_data_angles = []

for idx, row in df_ratio.iterrows():
    combination = row['Combination']
    angle_1_2 = calculate_angle(combination[0], combination[1])
    angle_1_3 = calculate_angle(combination[0], combination[2])
    angle_2_3 = calculate_angle(combination[1], combination[2])

    if (angle_threshold_1_2_range[0] <= angle_1_2 <= angle_threshold_1_2_range[1] and
            angle_threshold_1_3_range[0] <= angle_1_3 <= angle_threshold_1_3_range[1] and
            angle_threshold_2_3_range[0] <= angle_2_3 <= angle_threshold_2_3_range[1]):
        row['Angle_1_2'] = angle_1_2
        row['Angle_1_3'] = angle_1_3
        row['Angle_2_3'] = angle_2_3
        filtered_data_angles.append(row)

# Create DataFrame for angles
df_angles = pd.DataFrame(filtered_data_angles)
df_angles = df_angles[column_order + ['Angle_1_2', 'Angle_1_3', 'Angle_2_3']]
df_angles.to_excel(output_file_angles, index=False)

# Step 4: Vector Angle Filter
def calculate_vector_angle(vector1, vector2):
    dot_product = np.dot(vector1, vector2)
    magnitude_product = np.linalg.norm(vector1) * np.linalg.norm(vector2)
    if magnitude_product == 0:
        return np.nan
    cosine_angle = dot_product / magnitude_product
    cosine_angle = np.clip(cosine_angle, -1.0, 1.0)
    return np.degrees(np.arccos(cosine_angle))

def process_row(row):
    pdb_file_path = pdb_file
    if not os.path.isfile(pdb_file_path):
        print(f"PDB file not found: {pdb_file_path}")
        return [None, None, None]

    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('protein', pdb_file_path)
    model = structure[0]

    try:
        residues = row['Combination']
        print(f"Residues loaded: {residues}")

        # Initialize the list of angles
        vector_angles = []

        # Pairs to calculate vector angles between
        pairs = [(0, 1), (0, 2), (1, 2)]

        for i, j in pairs:
            try:
                CA1 = residues[i]['CA']
                CA2 = residues[j]['CA']
                CB1 = residues[i]['CB'] if 'CB' in residues[i] else CA1
                CB2 = residues[j]['CB'] if 'CB' in residues[j] else CA2

                vector_CA = CA2.coord - CA1.coord
                vector_CB = CB2.coord - CB1.coord

                angle = calculate_vector_angle(vector_CA, vector_CB)
                vector_angles.append(angle)
            except KeyError as e:
                print(f"KeyError for residues {residues[i]} and {residues[j]}: {e}")
                vector_angles.append(None)

        return vector_angles
    except KeyError as e:
        print(f"KeyError: {e}")
        return [None, None, None]

# Apply the function to each row and store the results
vector_angle_results = df_angles.apply(process_row, axis=1, result_type='expand')
df_angles[['Vector_Angle_1_2', 'Vector_Angle_1_3', 'Vector_Angle_2_3']] = vector_angle_results

# Set angle threshold ranges for vector angles
angle_thresholds = {
    'Vector_Angle_1_2': (0, 15),
    'Vector_Angle_1_3': (0, 15),
    'Vector_Angle_2_3': (0, 15)
}

# Create filter columns based on angle thresholds
for col, (low, high) in angle_thresholds.items():
    df_angles[f'{col}_Filter'] = df_angles.apply(lambda row: low < row[col] < high if pd.notnull(row[col]) else False, axis=1)

# Combine filters
df_angles['Vector_Angle_Filter'] = df_angles[[f'{col}_Filter' for col in angle_thresholds.keys()]].all(axis=1)

# Final filter application based on vector angles
df_final_filter = df_angles[df_angles['Vector_Angle_Filter']]

# Save the final filtered DataFrame to an Excel file
df_final_filter.to_excel(output_file_vector_angles_final_filter, index=False)

# Generate PyMOL script file
pymol_script_commands = []

# Adding a sequential combination number starting from 1
df_final_filter['Combination_Number'] = range(1, len(df_final_filter) + 1)

for index, row in df_final_filter.iterrows():
    combination = row['Combination']
    chain1, res1 = combination[0].get_full_id()[2], combination[0].get_full_id()[3][1]
    chain2, res2 = combination[1].get_full_id()[2], combination[1].get_full_id()[3][1]
    chain3, res3 = combination[2].get_full_id()[2], combination[2].get_full_id()[3][1]

    selection_name = f"obj{row['Combination_Number']:02d}"
    pymol_script_commands.append(f"select {selection_name}, (chain {chain1} and resi {res1}) or (chain {chain2} and resi {res2}) or (chain {chain3} and resi {res3})")
    pymol_script_commands.append(f"create {selection_name}_residue1, /{pdb_file}//{chain1}/{res1}")
    pymol_script_commands.append(f"create {selection_name}_residue2, /{pdb_file}//{chain2}/{res2}")
    pymol_script_commands.append(f"create {selection_name}_residue3, /{pdb_file}//{chain3}/{res3}")

# Save PyMOL commands to file
with open(pymol_script_file, 'w') as f:
    f.write("# PyMOL script for visualizing filtered residue combinations\n\n")
    for command in pymol_script_commands:
        f.write(command + '\n')

print(f"\nPyMOL script saved to {pymol_script_file}")

Residues loaded: (<Residue ALA het=  resseq=27 icode= >, <Residue GLU het=  resseq=128 icode= >, <Residue ILE het=  resseq=130 icode= >)
Residues loaded: (<Residue GLU het=  resseq=52 icode= >, <Residue ILE het=  resseq=130 icode= >, <Residue ILE het=  resseq=78 icode= >)
Residues loaded: (<Residue VAL het=  resseq=77 icode= >, <Residue ALA het=  resseq=80 icode= >, <Residue ILE het=  resseq=130 icode= >)
Residues loaded: (<Residue VAL het=  resseq=77 icode= >, <Residue ASP het=  resseq=106 icode= >, <Residue ILE het=  resseq=130 icode= >)
Residues loaded: (<Residue VAL het=  resseq=77 icode= >, <Residue ILE het=  resseq=130 icode= >, <Residue ASN het=  resseq=132 icode= >)
Residues loaded: (<Residue ILE het=  resseq=78 icode= >, <Residue GLU het=  resseq=52 icode= >, <Residue ILE het=  resseq=130 icode= >)
Residues loaded: (<Residue ILE het=  resseq=78 icode= >, <Residue ILE het=  resseq=130 icode= >, <Residue ASN het=  resseq=132 icode= >)
Residues loaded: (<Residue ALA het=  resseq=

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final_filter['Combination_Number'] = range(1, len(df_final_filter) + 1)


In [None]:
#FInal_all(진짜 마지막이거 끝: 2His/1Cys)
import pandas as pd
import numpy as np
from Bio.PDB import PDBParser
import itertools
import os

# File paths
pdb_file = "/content/1EP0_alanine_dimer.pdb"
output_file_distances = "/content/1EP0_2His_1Cys_132_distances.xlsx"
output_file_ratio = "/content/1EP0_2His_1Cys_132_ratio.xlsx"
output_file_angles = "/content/1EP0_2His_1Cys_132_angles.xlsx"
output_file_vector_angles_final_filter = "/content/1EP0_2His_1Cys_132_filter.xlsx"
pymol_script_file = "/content/1EP0_3His_130_output.pml"

# PDB Parser setup
parser = PDBParser(QUIET=True)
structure = parser.get_structure('protein', pdb_file)
model = structure[0]
residues = [residue for residue in model.get_residues() if residue.get_id()[0] == ' ']

# Function to calculate angle between residue vectors
def calculate_angle(residue1, residue2):
    vector_ca1 = residue1['CB'].coord - residue1['CA'].coord if residue1.has_id('CB') else np.zeros(3)
    vector_ca2 = residue2['CB'].coord - residue2['CA'].coord if residue2.has_id('CB') else np.zeros(3)
    dot_product = np.dot(vector_ca1, vector_ca2)
    magnitude_product = np.linalg.norm(vector_ca1) * np.linalg.norm(vector_ca2)
    if magnitude_product == 0:
        return np.nan
    cosine_angle = dot_product / magnitude_product
    cosine_angle = np.clip(cosine_angle, -1.0, 1.0)
    return np.degrees(np.arccos(cosine_angle))

# Filter combinations
combinations = itertools.combinations(residues, 3)
selected_combinations = [comb for comb in combinations if any(res.get_id()[1] == 132 for res in comb)]

# Distance ranges
alpha_distance_range = (5.2, 10.1)
beta_distance_range = (3.8, 8.9)

# Angle threshold ranges
angle_threshold_1_2_range = (0, 130)
angle_threshold_1_3_range = (0, 130)
angle_threshold_2_3_range = (0, 130)

# Step 1: Distance Filter
filtered_data_distances = []

for idx, combination in enumerate(selected_combinations):
    alpha_distances, beta_distances = [], []

    try:
        for res1, res2 in itertools.combinations(combination, 2):
            if res1.has_id('CA') and res2.has_id('CA'):
                ca1, ca2 = res1['CA'].coord, res2['CA'].coord
                alpha_distance = np.linalg.norm(ca1 - ca2)
            else:
                continue

            if res1.has_id('CB') and res2.has_id('CB'):
                cb1, cb2 = res1['CB'].coord, res2['CB'].coord
                beta_distance = np.linalg.norm(cb1 - cb2)
            else:
                continue

            if (alpha_distance_range[0] <= alpha_distance <= alpha_distance_range[1] and
                    beta_distance_range[0] <= beta_distance <= beta_distance_range[1]):
                alpha_distances.append(alpha_distance)
                beta_distances.append(beta_distance)

        if len(alpha_distances) >= 3 and len(beta_distances) >= 3:
            filtered_data_distances.append({
                'PDB_ID': pdb_file,
                'Combination': combination,
                'Coord_chain_id_number1': combination[0].get_full_id()[2],
                'Coord_residue_number1': combination[0].get_full_id()[3][1],
                'Coord_residue_name1': combination[0].get_resname(),
                'Coord_atom_name1': 'CA',
                'Coord_chain_id_number2': combination[1].get_full_id()[2],
                'Coord_residue_number2': combination[1].get_full_id()[3][1],
                'Coord_residue_name2': combination[1].get_resname(),
                'Coord_atom_name2': 'CA',
                'Coord_chain_id_number3': combination[2].get_full_id()[2],
                'Coord_residue_number3': combination[2].get_full_id()[3][1],
                'Coord_residue_name3': combination[2].get_resname(),
                'Coord_atom_name3': 'CA',
                'Alpha Distance 1': alpha_distances[0],
                'Alpha Distance 2': alpha_distances[1],
                'Alpha Distance 3': alpha_distances[2],
                'Beta Distance 1': beta_distances[0],
                'Beta Distance 2': beta_distances[1],
                'Beta Distance 3': beta_distances[2]
            })

    except KeyError as e:
        print(f"Error processing combination {combination}: {e}")

# Create DataFrame for distances
column_order = [
    'PDB_ID',
    'Combination',
    'Coord_chain_id_number1', 'Coord_residue_number1', 'Coord_residue_name1', 'Coord_atom_name1',
    'Coord_chain_id_number2', 'Coord_residue_number2', 'Coord_residue_name2', 'Coord_atom_name2',
    'Coord_chain_id_number3', 'Coord_residue_number3', 'Coord_residue_name3', 'Coord_atom_name3',
    'Alpha Distance 1', 'Alpha Distance 2', 'Alpha Distance 3',
    'Beta Distance 1', 'Beta Distance 2', 'Beta Distance 3'
]

df_distances = pd.DataFrame(filtered_data_distances)
df_distances = df_distances[column_order]
df_distances.to_excel(output_file_distances, index=False)

# Step 2: Ratio Filter
filtered_data_ratio = []

for idx, row in df_distances.iterrows():
    alpha_distances = [row['Alpha Distance 1'], row['Alpha Distance 2'], row['Alpha Distance 3']]
    beta_distances = [row['Beta Distance 1'], row['Beta Distance 2'], row['Beta Distance 3']]

    for i in range(3):
        alpha_distance_i = alpha_distances[i]
        beta_distance_i = beta_distances[i]
        ratio = alpha_distance_i / beta_distance_i

        # Check if the ratio is within the specified range (0.8 to 1.3)
        if not (0.9 <= ratio <= 1.4):
            break
    else:
        filtered_data_ratio.append(row)

# Create DataFrame for ratio
df_ratio = pd.DataFrame(filtered_data_ratio)
df_ratio = df_ratio[column_order]
df_ratio.to_excel(output_file_ratio, index=False)

# Step 3: Angle Filter
filtered_data_angles = []

for idx, row in df_ratio.iterrows():
    combination = row['Combination']
    angle_1_2 = calculate_angle(combination[0], combination[1])
    angle_1_3 = calculate_angle(combination[0], combination[2])
    angle_2_3 = calculate_angle(combination[1], combination[2])

    if (angle_threshold_1_2_range[0] <= angle_1_2 <= angle_threshold_1_2_range[1] and
            angle_threshold_1_3_range[0] <= angle_1_3 <= angle_threshold_1_3_range[1] and
            angle_threshold_2_3_range[0] <= angle_2_3 <= angle_threshold_2_3_range[1]):
        row['Angle_1_2'] = angle_1_2
        row['Angle_1_3'] = angle_1_3
        row['Angle_2_3'] = angle_2_3
        filtered_data_angles.append(row)

# Create DataFrame for angles
df_angles = pd.DataFrame(filtered_data_angles)
df_angles = df_angles[column_order + ['Angle_1_2', 'Angle_1_3', 'Angle_2_3']]
df_angles.to_excel(output_file_angles, index=False)

# Step 4: Vector Angle Filter
def calculate_vector_angle(vector1, vector2):
    dot_product = np.dot(vector1, vector2)
    magnitude_product = np.linalg.norm(vector1) * np.linalg.norm(vector2)
    if magnitude_product == 0:
        return np.nan
    cosine_angle = dot_product / magnitude_product
    cosine_angle = np.clip(cosine_angle, -1.0, 1.0)
    return np.degrees(np.arccos(cosine_angle))

def process_row(row):
    pdb_file_path = pdb_file
    if not os.path.isfile(pdb_file_path):
        print(f"PDB file not found: {pdb_file_path}")
        return [None, None, None]

    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('protein', pdb_file_path)
    model = structure[0]

    try:
        residues = row['Combination']
        print(f"Residues loaded: {residues}")

        # Initialize the list of angles
        vector_angles = []

        # Pairs to calculate vector angles between
        pairs = [(0, 1), (0, 2), (1, 2)]

        for i, j in pairs:
            try:
                CA1 = residues[i]['CA']
                CA2 = residues[j]['CA']
                CB1 = residues[i]['CB'] if 'CB' in residues[i] else CA1
                CB2 = residues[j]['CB'] if 'CB' in residues[j] else CA2

                vector_CA = CA2.coord - CA1.coord
                vector_CB = CB2.coord - CB1.coord

                angle = calculate_vector_angle(vector_CA, vector_CB)
                vector_angles.append(angle)
            except KeyError as e:
                print(f"KeyError for residues {residues[i]} and {residues[j]}: {e}")
                vector_angles.append(None)

        return vector_angles
    except KeyError as e:
        print(f"KeyError: {e}")
        return [None, None, None]

# Apply the function to each row and store the results
vector_angle_results = df_angles.apply(process_row, axis=1, result_type='expand')
df_angles[['Vector_Angle_1_2', 'Vector_Angle_1_3', 'Vector_Angle_2_3']] = vector_angle_results

# Set angle threshold ranges for vector angles
angle_thresholds = {
    'Vector_Angle_1_2': (0, 20),
    'Vector_Angle_1_3': (0, 20),
    'Vector_Angle_2_3': (0, 20)
}

# Create filter columns based on angle thresholds
for col, (low, high) in angle_thresholds.items():
    df_angles[f'{col}_Filter'] = df_angles.apply(lambda row: low < row[col] < high if pd.notnull(row[col]) else False, axis=1)

# Combine filters
df_angles['Vector_Angle_Filter'] = df_angles[[f'{col}_Filter' for col in angle_thresholds.keys()]].all(axis=1)

# Final filter application based on vector angles
df_final_filter = df_angles[df_angles['Vector_Angle_Filter']]

# Save the final filtered DataFrame to an Excel file
df_final_filter.to_excel(output_file_vector_angles_final_filter, index=False)

# Generate PyMOL script file
pymol_script_commands = []

# Adding a sequential combination number starting from 1
df_final_filter['Combination_Number'] = range(1, len(df_final_filter) + 1)

for index, row in df_final_filter.iterrows():
    combination = row['Combination']
    chain1, res1 = combination[0].get_full_id()[2], combination[0].get_full_id()[3][1]
    chain2, res2 = combination[1].get_full_id()[2], combination[1].get_full_id()[3][1]
    chain3, res3 = combination[2].get_full_id()[2], combination[2].get_full_id()[3][1]

    selection_name = f"obj{row['Combination_Number']:02d}"
    pymol_script_commands.append(f"select {selection_name}, (chain {chain1} and resi {res1}) or (chain {chain2} and resi {res2}) or (chain {chain3} and resi {res3})")
    pymol_script_commands.append(f"create {selection_name}_residue1, /{pdb_file}//{chain1}/{res1}")
    pymol_script_commands.append(f"create {selection_name}_residue2, /{pdb_file}//{chain2}/{res2}")
    pymol_script_commands.append(f"create {selection_name}_residue3, /{pdb_file}//{chain3}/{res3}")

# Save PyMOL commands to file
with open(pymol_script_file, 'w') as f:
    f.write("# PyMOL script for visualizing filtered residue combinations\n\n")
    for command in pymol_script_commands:
        f.write(command + '\n')

print(f"\nPyMOL script saved to {pymol_script_file}")

Residues loaded: (<Residue THR het=  resseq=32 icode= >, <Residue ASP het=  resseq=50 icode= >, <Residue ASN het=  resseq=132 icode= >)
Residues loaded: (<Residue ASP het=  resseq=50 icode= >, <Residue ASN het=  resseq=132 icode= >, <Residue THR het=  resseq=32 icode= >)
Residues loaded: (<Residue LEU het=  resseq=74 icode= >, <Residue ARG het=  resseq=76 icode= >, <Residue ASN het=  resseq=132 icode= >)
Residues loaded: (<Residue ARG het=  resseq=76 icode= >, <Residue ILE het=  resseq=78 icode= >, <Residue ASN het=  resseq=132 icode= >)
Residues loaded: (<Residue ARG het=  resseq=76 icode= >, <Residue ASN het=  resseq=132 icode= >, <Residue LYS het=  resseq=134 icode= >)
Residues loaded: (<Residue ILE het=  resseq=78 icode= >, <Residue ILE het=  resseq=130 icode= >, <Residue ASN het=  resseq=132 icode= >)
Residues loaded: (<Residue ILE het=  resseq=78 icode= >, <Residue ILE het=  resseq=130 icode= >, <Residue ASN het=  resseq=132 icode= >)
Residues loaded: (<Residue ILE het=  resseq=1

In [None]:
#FInal_all(진짜 마지막이거 끝: 3His): No specification
import pandas as pd
import numpy as np
from Bio.PDB import PDBParser
import itertools
import os

# File paths
pdb_file = "/content/3ljm.pdb"
output_file_distances = "/content/3ljm_distances_tight3.xlsx"
output_file_ratio = "/content/3ljm_ratio_tight3.xlsx"
output_file_angles = "/content/3ljm_angles_tight3.xlsx"
output_file_vector_angles_final_filter = "/content/3ljm_filter_tight3.xlsx"
pymol_script_file = "/content/3ljm_output_tight3.pml"

# PDB Parser setup
parser = PDBParser(QUIET=True)
structure = parser.get_structure('protein', pdb_file)
model = structure[0]
residues = [residue for residue in model.get_residues() if residue.get_id()[0] == ' ']

# Function to calculate angle between residue vectors
def calculate_angle(residue1, residue2):
    vector_ca1 = residue1['CB'].coord - residue1['CA'].coord if residue1.has_id('CB') else np.zeros(3)
    vector_ca2 = residue2['CB'].coord - residue2['CA'].coord if residue2.has_id('CB') else np.zeros(3)
    dot_product = np.dot(vector_ca1, vector_ca2)
    magnitude_product = np.linalg.norm(vector_ca1) * np.linalg.norm(vector_ca2)
    if magnitude_product == 0:
        return np.nan
    cosine_angle = dot_product / magnitude_product
    cosine_angle = np.clip(cosine_angle, -1.0, 1.0)
    return np.degrees(np.arccos(cosine_angle))

# Filter combinations
combinations = itertools.combinations(residues, 3)
selected_combinations = list(combinations)

# Distance ranges
alpha_distance_range = (5.6, 10.4)
beta_distance_range = (5.7, 9.3)

# Angle threshold ranges
angle_threshold_1_2_range = (15, 110)
angle_threshold_1_3_range = (15, 110)
angle_threshold_2_3_range = (15, 110)

# Step 1: Distance Filter
filtered_data_distances = []

for idx, combination in enumerate(selected_combinations):
    alpha_distances, beta_distances = [], []

    try:
        for res1, res2 in itertools.combinations(combination, 2):
            if res1.has_id('CA') and res2.has_id('CA'):
                ca1, ca2 = res1['CA'].coord, res2['CA'].coord
                alpha_distance = np.linalg.norm(ca1 - ca2)
            else:
                continue

            if res1.has_id('CB') and res2.has_id('CB'):
                cb1, cb2 = res1['CB'].coord, res2['CB'].coord
                beta_distance = np.linalg.norm(cb1 - cb2)
            else:
                continue

            if (alpha_distance_range[0] <= alpha_distance <= alpha_distance_range[1] and
                    beta_distance_range[0] <= beta_distance <= beta_distance_range[1]):
                alpha_distances.append(alpha_distance)
                beta_distances.append(beta_distance)

        if len(alpha_distances) >= 3 and len(beta_distances) >= 3:
            filtered_data_distances.append({
                'PDB_ID': pdb_file,
                'Combination': combination,
                'Coord_chain_id_number1': combination[0].get_full_id()[2],
                'Coord_residue_number1': combination[0].get_full_id()[3][1],
                'Coord_residue_name1': combination[0].get_resname(),
                'Coord_atom_name1': 'CA',
                'Coord_chain_id_number2': combination[1].get_full_id()[2],
                'Coord_residue_number2': combination[1].get_full_id()[3][1],
                'Coord_residue_name2': combination[1].get_resname(),
                'Coord_atom_name2': 'CA',
                'Coord_chain_id_number3': combination[2].get_full_id()[2],
                'Coord_residue_number3': combination[2].get_full_id()[3][1],
                'Coord_residue_name3': combination[2].get_resname(),
                'Coord_atom_name3': 'CA',
                'Alpha Distance 1': alpha_distances[0],
                'Alpha Distance 2': alpha_distances[1],
                'Alpha Distance 3': alpha_distances[2],
                'Beta Distance 1': beta_distances[0],
                'Beta Distance 2': beta_distances[1],
                'Beta Distance 3': beta_distances[2]
            })

    except KeyError as e:
        print(f"Error processing combination {combination}: {e}")

# Create DataFrame for distances
column_order = [
    'PDB_ID',
    'Combination',
    'Coord_chain_id_number1', 'Coord_residue_number1', 'Coord_residue_name1', 'Coord_atom_name1',
    'Coord_chain_id_number2', 'Coord_residue_number2', 'Coord_residue_name2', 'Coord_atom_name2',
    'Coord_chain_id_number3', 'Coord_residue_number3', 'Coord_residue_name3', 'Coord_atom_name3',
    'Alpha Distance 1', 'Alpha Distance 2', 'Alpha Distance 3',
    'Beta Distance 1', 'Beta Distance 2', 'Beta Distance 3'
]

df_distances = pd.DataFrame(filtered_data_distances)
df_distances = df_distances[column_order]
df_distances.to_excel(output_file_distances, index=False)

# Step 2: Ratio Filter
filtered_data_ratio = []

for idx, row in df_distances.iterrows():
    alpha_distances = [row['Alpha Distance 1'], row['Alpha Distance 2'], row['Alpha Distance 3']]
    beta_distances = [row['Beta Distance 1'], row['Beta Distance 2'], row['Beta Distance 3']]

    for i in range(3):
        alpha_distance_i = alpha_distances[i]
        beta_distance_i = beta_distances[i]
        ratio = alpha_distance_i / beta_distance_i

        # Check if the ratio is within the specified range (0.9 to 1.3)
        if not (0.9 <= ratio <= 1.2):
            break
    else:
        filtered_data_ratio.append(row)

# Create DataFrame for ratio
df_ratio = pd.DataFrame(filtered_data_ratio)
df_ratio = df_ratio[column_order]
df_ratio.to_excel(output_file_ratio, index=False)

# Step 3: Angle Filter
filtered_data_angles = []

for idx, row in df_ratio.iterrows():
    combination = row['Combination']
    angle_1_2 = calculate_angle(combination[0], combination[1])
    angle_1_3 = calculate_angle(combination[0], combination[2])
    angle_2_3 = calculate_angle(combination[1], combination[2])

    if (angle_threshold_1_2_range[0] <= angle_1_2 <= angle_threshold_1_2_range[1] and
            angle_threshold_1_3_range[0] <= angle_1_3 <= angle_threshold_1_3_range[1] and
            angle_threshold_2_3_range[0] <= angle_2_3 <= angle_threshold_2_3_range[1]):
        row['Angle_1_2'] = angle_1_2
        row['Angle_1_3'] = angle_1_3
        row['Angle_2_3'] = angle_2_3
        filtered_data_angles.append(row)

# Create DataFrame for angles
df_angles = pd.DataFrame(filtered_data_angles)
df_angles = df_angles[column_order + ['Angle_1_2', 'Angle_1_3', 'Angle_2_3']]
df_angles.to_excel(output_file_angles, index=False)

# Step 4: Vector Angle Filter
def calculate_vector_angle(vector1, vector2):
    dot_product = np.dot(vector1, vector2)
    magnitude_product = np.linalg.norm(vector1) * np.linalg.norm(vector2)
    if magnitude_product == 0:
        return np.nan
    cosine_angle = dot_product / magnitude_product
    cosine_angle = np.clip(cosine_angle, -1.0, 1.0)
    return np.degrees(np.arccos(cosine_angle))

def process_row(row):
    pdb_file_path = pdb_file
    if not os.path.isfile(pdb_file_path):
        print(f"PDB file not found: {pdb_file_path}")
        return [None, None, None]

    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('protein', pdb_file_path)
    model = structure[0]

    try:
        residues = row['Combination']
        print(f"Residues loaded: {residues}")

        # Initialize the list of angles
        vector_angles = []

        # Pairs to calculate vector angles between
        pairs = [(0, 1), (0, 2), (1, 2)]

        for i, j in pairs:
            try:
                CA1 = residues[i]['CA']
                CA2 = residues[j]['CA']
                CB1 = residues[i]['CB'] if 'CB' in residues[i] else CA1
                CB2 = residues[j]['CB'] if 'CB' in residues[j] else CA2

                vector_CA = CA2.coord - CA1.coord
                vector_CB = CB2.coord - CB1.coord

                angle = calculate_vector_angle(vector_CA, vector_CB)
                vector_angles.append(angle)
            except KeyError as e:
                print(f"KeyError for residues {residues[i]} and {residues[j]}: {e}")
                vector_angles.append(None)

        return vector_angles
    except KeyError as e:
        print(f"KeyError: {e}")
        return [None, None, None]

# Apply the function to each row and store the results
vector_angle_results = df_angles.apply(process_row, axis=1, result_type='expand')
df_angles[['Vector_Angle_1_2', 'Vector_Angle_1_3', 'Vector_Angle_2_3']] = vector_angle_results

# Set angle threshold ranges for vector angles
angle_thresholds = {
    'Vector_Angle_1_2': (0, 20),
    'Vector_Angle_1_3': (0, 20),
    'Vector_Angle_2_3': (0, 20)
}

# Create filter columns based on angle thresholds
for col, (low, high) in angle_thresholds.items():
    df_angles[f'{col}_Filter'] = df_angles.apply(lambda row: low < row[col] < high if pd.notnull(row[col]) else False, axis=1)

# Combine filters
df_angles['Vector_Angle_Filter'] = df_angles[[f'{col}_Filter' for col in angle_thresholds.keys()]].all(axis=1)

# Final filter application based on vector angles
df_final_filter = df_angles[df_angles['Vector_Angle_Filter']]

# Save the final filtered DataFrame to an Excel file
df_final_filter.to_excel(output_file_vector_angles_final_filter, index=False)

# Generate PyMOL script file
pymol_script_commands = []

# Adding a sequential combination number starting from 1
df_final_filter['Combination_Number'] = range(1, len(df_final_filter) + 1)

for index, row in df_final_filter.iterrows():
    combination = row['Combination']
    chain1, res1 = combination[0].get_full_id()[2], combination[0].get_full_id()[3][1]
    chain2, res2 = combination[1].get_full_id()[2], combination[1].get_full_id()[3][1]
    chain3, res3 = combination[2].get_full_id()[2], combination[2].get_full_id()[3][1]

    selection_name = f"obj{row['Combination_Number']:02d}"
    pymol_script_commands.append(f"select {selection_name}, (chain {chain1} and resi {res1}) or (chain {chain2} and resi {res2}) or (chain {chain3} and resi {res3})")
    pymol_script_commands.append(f"create {selection_name}_residue1, /{pdb_file}//{chain1}/{res1}")
    pymol_script_commands.append(f"create {selection_name}_residue2, /{pdb_file}//{chain2}/{res2}")
    pymol_script_commands.append(f"create {selection_name}_residue3, /{pdb_file}//{chain3}/{res3}")

# Save PyMOL commands to file
with open(pymol_script_file, 'w') as f:
    f.write("# PyMOL script for visualizing filtered residue combinations\n\n")
    for command in pymol_script_commands:
        f.write(command + '\n')

print(f"\nPyMOL script saved to {pymol_script_file}")

Residues loaded: (<Residue GLU het=  resseq=1 icode= >, <Residue LEU het=  resseq=5 icode= >, <Residue GLU het=  resseq=6 icode= >)
Residues loaded: (<Residue TRP het=  resseq=2 icode= >, <Residue GLU het=  resseq=6 icode= >, <Residue LEU het=  resseq=5 icode= >)
Residues loaded: (<Residue TRP het=  resseq=2 icode= >, <Residue TRP het=  resseq=2 icode= >, <Residue TRP het=  resseq=2 icode= >)
Residues loaded: (<Residue LEU het=  resseq=5 icode= >, <Residue TRP het=  resseq=2 icode= >, <Residue GLU het=  resseq=6 icode= >)
Residues loaded: (<Residue GLU het=  resseq=6 icode= >, <Residue ALA het=  resseq=10 icode= >, <Residue LYS het=  resseq=8 icode= >)
Residues loaded: (<Residue GLU het=  resseq=6 icode= >, <Residue GLU het=  resseq=1 icode= >, <Residue LEU het=  resseq=5 icode= >)
Residues loaded: (<Residue LYS het=  resseq=8 icode= >, <Residue GLU het=  resseq=6 icode= >, <Residue ALA het=  resseq=10 icode= >)
Residues loaded: (<Residue CYS het=  resseq=9 icode= >, <Residue CYS het= 