In [None]:
#2D_Final
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from Bio.PDB import PDBParser, is_aa
from math import acos, degrees

def calculate_distance(coord1, coord2):
    return np.linalg.norm(coord1 - coord2)

def calculate_angle(coord1, coord2, coord3):
    # Calculate vector from coord2 to coord1 and coord2 to coord3
    v1 = coord1 - coord2
    v2 = coord3 - coord2
    # Calculate cosine of the angle using dot product
    cosine_angle = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    # Ensure the cosine value is within the valid range due to floating-point errors
    cosine_angle = np.clip(cosine_angle, -1.0, 1.0)
    # Return the angle in degrees
    return degrees(acos(cosine_angle))

def extract_zn_coordinates(pdb_directory, pdb_entry, metal_chain_id, metal_residue_number):
    # Initialize PDB parser
    parser = PDBParser(QUIET=True)

    # Build PDB file path
    pdb_file = os.path.join(pdb_directory, f"{pdb_entry}.pdb")

    if not os.path.exists(pdb_file):
        print(f"PDB file for {pdb_entry} not found in the directory.")
        return None

    # Parse the PDB file
    structure = parser.get_structure(pdb_entry, pdb_file)

    # Iterate over all chains and residues to find the Zn atom
    for chain in structure[0]:
        if chain.id == metal_chain_id:
            for residue in chain:
                if residue.id[1] == metal_residue_number and residue.resname == 'ZN':
                    # Extract the Zn atom coordinates
                    for atom in residue:
                        if atom.element == 'ZN':
                            return atom.coord
    return None

def extract_coordinates(structure, chain_id, residue_number, atom_name):
    for chain in structure[0]:
        if chain.id == chain_id:
            for residue in chain:
                if residue.id[1] == residue_number and is_aa(residue):
                    if atom_name in residue:
                        return residue[atom_name].coord
    return None

def process_pdb_file(pdb_directory, pdb_entry, metal_chain_id, metal_residue_number, residues_info):
    parser = PDBParser(QUIET=True)
    pdb_file = os.path.join(pdb_directory, f"{pdb_entry}.pdb")

    if not os.path.exists(pdb_file):
        print(f"PDB file for {pdb_entry} not found in the directory.")
        return None

    structure = parser.get_structure(pdb_entry, pdb_file)

    # Extract Zn coordinates using the improved method
    zn_coords = extract_zn_coordinates(pdb_directory, pdb_entry, metal_chain_id, metal_residue_number)
    if zn_coords is None:
        print(f"Zn atom not found in {pdb_entry}.")
        return None

    ca_zn_distances = []
    cb_zn_distances = []
    ca_zn_cb_angles = []

    for residue_info in residues_info:
        chain_id, residue_number = residue_info['chain_id'], residue_info['residue_number']
        ca_coords = extract_coordinates(structure, chain_id, residue_number, 'CA')
        cb_coords = extract_coordinates(structure, chain_id, residue_number, 'CB')

        if ca_coords is not None and cb_coords is not None:
            # Calculate distances
            ca_zn_distance = calculate_distance(ca_coords, zn_coords)
            cb_zn_distance = calculate_distance(cb_coords, zn_coords)

            # Calculate angle between CA-Zn-CB
            ca_zn_cb_angle = calculate_angle(ca_coords, zn_coords, cb_coords)

            ca_zn_distances.append(ca_zn_distance)
            cb_zn_distances.append(cb_zn_distance)
            ca_zn_cb_angles.append(ca_zn_cb_angle)
        else:
            print(f"CA or CB atom not found for residue {residue_number} in chain {chain_id} of {pdb_entry}.")

    if len(ca_zn_distances) == 3 and len(cb_zn_distances) == 3 and len(ca_zn_cb_angles) == 3:
        return {
            'Entry ID': pdb_entry,
            'Calpha_Zn_Dist1': ca_zn_distances[0],
            'Calpha_Zn_Dist2': ca_zn_distances[1],
            'Calpha_Zn_Dist3': ca_zn_distances[2],
            'Cbeta_Zn_Dist1': cb_zn_distances[0],
            'Cbeta_Zn_Dist2': cb_zn_distances[1],
            'Cbeta_Zn_Dist3': cb_zn_distances[2],
            'CA-Zn-CB Angle_1': ca_zn_cb_angles[0],
            'CA-Zn-CB Angle_2': ca_zn_cb_angles[1],
            'CA-Zn-CB Angle_3': ca_zn_cb_angles[2]
        }
    else:
        return None

def calculate_and_trim_probability_map(df, calpha_cols, cbeta_cols, calpha_bins, cbeta_bins, npy_file='prob_map.npy', excel_file='precomputed_prob_map.xlsx'):
    # Concatenate all Calpha and Cbeta distances into single series
    all_calpha_distances = pd.concat([df[calpha_col] for calpha_col in calpha_cols])
    all_cbeta_distances = pd.concat([df[cbeta_col] for cbeta_col in cbeta_cols])

    # Create a 2D histogram to count occurrences in each bin
    hist, calpha_bins_edges, cbeta_bins_edges = np.histogram2d(all_calpha_distances, all_cbeta_distances, bins=[calpha_bins, cbeta_bins])

    # Normalize the histogram to get probabilities
    prob_map = hist / np.sum(hist)

    # Create a DataFrame for the probability map with all bins included, even if counts are zero
    prob_map_df = pd.DataFrame(prob_map, index=calpha_bins_edges[:-1], columns=cbeta_bins_edges[:-1])

    # Set the index and column names to match the expected output format
    prob_map_df.index = prob_map_df.index[:len(prob_map_df)]
    prob_map_df.columns = prob_map_df.columns[:len(prob_map_df.columns)]

    # Reset the index and save the formatted DataFrame
    formatted_prob_map_df = prob_map_df.reset_index()
    formatted_prob_map_df.columns = ['Unnamed: 0'] + list(formatted_prob_map_df.columns[1:])

    # Save the trimmed probability map to an Excel file
    formatted_prob_map_df.to_excel(excel_file, index=False)
    print(f"Trimmed probability map saved to {excel_file}")

    # Save the probability map as a .npy file for later use
    np.save(npy_file, prob_map)
    print(f"Probability map saved as {npy_file}")

    return formatted_prob_map_df

def main():
    pdb_directory = 'L:/Zn-installer_rawdata/241020_Zn_Final/'  # Update this path to the directory where PDB files are stored
    excel_file_path = 'D:/241113_Metal_Intaller_Data_Final/ZN/2His_1Glu/2His_1Glu.xlsx'

    # Load the Excel sheet data
    sheet_data = pd.read_excel(excel_file_path, sheet_name='Sheet1')

    all_results = []

    for index, row in sheet_data.iterrows():
        pdb_entry = row['Entry ID']
        metal_chain_id = row['Metal Chain ID']
        metal_residue_number = row['Metal Residue number']

        residues_info = [
            {'chain_id': row['Chain ID1'], 'residue_number': row['Residue_number1']},
            {'chain_id': row['Chain ID2'], 'residue_number': row['Residue_number2']},
            {'chain_id': row['Chain ID3'], 'residue_number': row['Residue_number3']}
        ]

        # Process each PDB file
        result = process_pdb_file(pdb_directory, pdb_entry, metal_chain_id, metal_residue_number, residues_info)
        if result:
            all_results.append(result)

    # Convert results to DataFrame and save
    results_df = pd.DataFrame(all_results)
    print(results_df)
    results_df.to_excel('D:/241113_Metal_Intaller_Data_Final/ZN/2His_1Glu/2His_1Glu_distance_angle.xlsx', index=False)

    # Calculate and save the probability map
    calpha_cols = ['Calpha_Zn_Dist1', 'Calpha_Zn_Dist2', 'Calpha_Zn_Dist3']
    cbeta_cols = ['Cbeta_Zn_Dist1', 'Cbeta_Zn_Dist2', 'Cbeta_Zn_Dist3']
    calpha_bins = np.arange(2.9, 7.6, 0.2).tolist()
    cbeta_bins = np.arange(3.1, 6.3, 0.2).tolist()
    npy_file = 'D:/241113_Metal_Intaller_Data_Final/ZN/2His_1Glu/2His_1Glu_distance_distance_angle_0.2.npy'
    excel_file = 'D:/241113_Metal_Intaller_Data_Final/ZN/2His_1Glu/2His_1Glu_distance_distance_angle_0.2.xlsx'
    calculate_and_trim_probability_map(results_df, calpha_cols, cbeta_cols, calpha_bins, cbeta_bins, npy_file, excel_file)

if __name__ == "__main__":
    main()

In [None]:
#2D_Final
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from Bio.PDB import PDBParser, is_aa
from math import acos, degrees

def calculate_distance(coord1, coord2):
    return np.linalg.norm(coord1 - coord2)

def calculate_angle(coord1, coord2, coord3):
    # Calculate vector from coord2 to coord1 and coord2 to coord3
    v1 = coord1 - coord2
    v2 = coord3 - coord2
    # Calculate cosine of the angle using dot product
    cosine_angle = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    # Ensure the cosine value is within the valid range due to floating-point errors
    cosine_angle = np.clip(cosine_angle, -1.0, 1.0)
    # Return the angle in degrees
    return degrees(acos(cosine_angle))

def extract_zn_coordinates(pdb_directory, pdb_entry, metal_chain_id, metal_residue_number):
    # Initialize PDB parser
    parser = PDBParser(QUIET=True)

    # Build PDB file path
    pdb_file = os.path.join(pdb_directory, f"{pdb_entry}.pdb")

    if not os.path.exists(pdb_file):
        print(f"PDB file for {pdb_entry} not found in the directory.")
        return None

    # Parse the PDB file
    structure = parser.get_structure(pdb_entry, pdb_file)

    # Iterate over all chains and residues to find the Zn atom
    for chain in structure[0]:
        if chain.id == metal_chain_id:
            for residue in chain:
                if residue.id[1] == metal_residue_number and residue.resname == 'ZN':
                    # Extract the Zn atom coordinates
                    for atom in residue:
                        if atom.element == 'ZN':
                            return atom.coord
    return None

def extract_coordinates(structure, chain_id, residue_number, atom_name):
    for chain in structure[0]:
        if chain.id == chain_id:
            for residue in chain:
                if residue.id[1] == residue_number and is_aa(residue):
                    if atom_name in residue:
                        return residue[atom_name].coord
    return None

def process_pdb_file(pdb_directory, pdb_entry, metal_chain_id, metal_residue_number, residues_info):
    parser = PDBParser(QUIET=True)
    pdb_file = os.path.join(pdb_directory, f"{pdb_entry}.pdb")

    if not os.path.exists(pdb_file):
        print(f"PDB file for {pdb_entry} not found in the directory.")
        return None

    structure = parser.get_structure(pdb_entry, pdb_file)

    # Extract Zn coordinates using the improved method
    zn_coords = extract_zn_coordinates(pdb_directory, pdb_entry, metal_chain_id, metal_residue_number)
    if zn_coords is None:
        print(f"Zn atom not found in {pdb_entry}.")
        return None

    ca_zn_distances = []
    cb_zn_distances = []
    ca_zn_cb_angles = []

    for residue_info in residues_info:
        chain_id, residue_number = residue_info['chain_id'], residue_info['residue_number']
        ca_coords = extract_coordinates(structure, chain_id, residue_number, 'CA')
        cb_coords = extract_coordinates(structure, chain_id, residue_number, 'CB')

        if ca_coords is not None and cb_coords is not None:
            # Calculate distances
            ca_zn_distance = calculate_distance(ca_coords, zn_coords)
            cb_zn_distance = calculate_distance(cb_coords, zn_coords)

            # Calculate angle between CA-Zn-CB
            ca_zn_cb_angle = calculate_angle(ca_coords, zn_coords, cb_coords)

            ca_zn_distances.append(ca_zn_distance)
            cb_zn_distances.append(cb_zn_distance)
            ca_zn_cb_angles.append(ca_zn_cb_angle)
        else:
            print(f"CA or CB atom not found for residue {residue_number} in chain {chain_id} of {pdb_entry}.")

    if len(ca_zn_distances) == 3 and len(cb_zn_distances) == 3 and len(ca_zn_cb_angles) == 3:
        return {
            'Entry ID': pdb_entry,
            'Calpha_Zn_Dist1': ca_zn_distances[0],
            'Calpha_Zn_Dist2': ca_zn_distances[1],
            'Calpha_Zn_Dist3': ca_zn_distances[2],
            'Cbeta_Zn_Dist1': cb_zn_distances[0],
            'Cbeta_Zn_Dist2': cb_zn_distances[1],
            'Cbeta_Zn_Dist3': cb_zn_distances[2],
            'CA-Zn-CB Angle_1': ca_zn_cb_angles[0],
            'CA-Zn-CB Angle_2': ca_zn_cb_angles[1],
            'CA-Zn-CB Angle_3': ca_zn_cb_angles[2]
        }
    else:
        return None

def calculate_and_trim_probability_map(df, calpha_cols, cbeta_cols, calpha_bins, cbeta_bins, npy_file='prob_map.npy', excel_file='precomputed_prob_map.xlsx'):
    # Concatenate all Calpha and Cbeta distances into single series
    all_calpha_distances = pd.concat([df[calpha_col] for calpha_col in calpha_cols])
    all_cbeta_distances = pd.concat([df[cbeta_col] for cbeta_col in cbeta_cols])

    # Create a 2D histogram to count occurrences in each bin
    hist, calpha_bins_edges, cbeta_bins_edges = np.histogram2d(all_calpha_distances, all_cbeta_distances, bins=[calpha_bins, cbeta_bins])

    # Normalize the histogram to get probabilities
    prob_map = hist / np.sum(hist)

    # Create a DataFrame for the probability map with all bins included, even if counts are zero
    prob_map_df = pd.DataFrame(prob_map, index=calpha_bins_edges[:-1], columns=cbeta_bins_edges[:-1])

    # Set the index and column names to match the expected output format
    prob_map_df.index = prob_map_df.index[:len(prob_map_df)]
    prob_map_df.columns = prob_map_df.columns[:len(prob_map_df.columns)]

    # Reset the index and save the formatted DataFrame
    formatted_prob_map_df = prob_map_df.reset_index()
    formatted_prob_map_df.columns = ['Unnamed: 0'] + list(formatted_prob_map_df.columns[1:])

    # Save the trimmed probability map to an Excel file
    formatted_prob_map_df.to_excel(excel_file, index=False)
    print(f"Trimmed probability map saved to {excel_file}")

    # Save the probability map as a .npy file for later use
    np.save(npy_file, prob_map)
    print(f"Probability map saved as {npy_file}")

    return formatted_prob_map_df

def main():
    pdb_directory = 'L:/Zn-installer_rawdata/241111_Cu_Final/Final_3'  # Update this path to the directory where PDB files are stored
    excel_file_path = 'D:/241113_Metal_Intaller_Data_Final/CU/3His/3His_Final.xlsx'

    # Load the Excel sheet data
    sheet_data = pd.read_excel(excel_file_path, sheet_name='Sheet1')

    all_results = []

    for index, row in sheet_data.iterrows():
        pdb_entry = row['Entry ID']
        metal_chain_id = row['Metal Chain ID']
        metal_residue_number = row['Metal Residue number']

        residues_info = [
            {'chain_id': row['Chain ID1'], 'residue_number': row['Residue_number1']},
            {'chain_id': row['Chain ID2'], 'residue_number': row['Residue_number2']},
            {'chain_id': row['Chain ID3'], 'residue_number': row['Residue_number3']}
        ]

        # Process each PDB file
        result = process_pdb_file(pdb_directory, pdb_entry, metal_chain_id, metal_residue_number, residues_info)
        if result:
            all_results.append(result)

    # Convert results to DataFrame and save
    results_df = pd.DataFrame(all_results)
    print(results_df)
    results_df.to_excel('D:/241113_Metal_Intaller_Data_Final/CU/3His/3His_distance_angle.xlsx', index=False)

    # Calculate and save the probability map
    calpha_cols = ['Calpha_Zn_Dist1', 'Calpha_Zn_Dist2', 'Calpha_Zn_Dist3']
    cbeta_cols = ['Cbeta_Zn_Dist1', 'Cbeta_Zn_Dist2', 'Cbeta_Zn_Dist3']
    calpha_bins = np.arange(3.0, 6.7, 0.2).tolist()
    cbeta_bins = np.arange(2.9, 5.9, 0.2).tolist()
    npy_file = 'D:/241113_Metal_Intaller_Data_Final/CU/3His/3His_distance_distance_angle_0.2.npy'
    excel_file = 'D:/241113_Metal_Intaller_Data_Final/CU/3His/3His_distance_distance_angle_0.2.xlsx'
    calculate_and_trim_probability_map(results_df, calpha_cols, cbeta_cols, calpha_bins, cbeta_bins, npy_file, excel_file)

if __name__ == "__main__":
    main()


In [None]:
#3D_Final

import os
import numpy as np
import pandas as pd
from Bio.PDB import PDBParser, is_aa
from math import acos, degrees

# Helper functions to calculate distances and angles
def calculate_distance(coord1, coord2):
    return np.linalg.norm(coord1 - coord2)

def calculate_angle(coord1, coord2, coord3):
    v1 = coord1 - coord2
    v2 = coord3 - coord2
    cosine_angle = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    cosine_angle = np.clip(cosine_angle, -1.0, 1.0)
    return degrees(acos(cosine_angle))

# Load Zn coordinates
def extract_zn_coordinates(pdb_directory, pdb_entry, metal_chain_id, metal_residue_number):
    parser = PDBParser(QUIET=True)
    pdb_file = os.path.join(pdb_directory, f"{pdb_entry}.pdb")
    if not os.path.exists(pdb_file):
        print(f"PDB file for {pdb_entry} not found.")
        return None

    structure = parser.get_structure(pdb_entry, pdb_file)
    for chain in structure[0]:
        if chain.id == metal_chain_id:
            for residue in chain:
                if residue.id[1] == metal_residue_number and residue.resname == 'ZN':
                    for atom in residue:
                        if atom.element == 'ZN':
                            return atom.coord
    return None

# Load atom coordinates
def extract_coordinates(structure, chain_id, residue_number, atom_name):
    for chain in structure[0]:
        if chain.id == chain_id:
            for residue in chain:
                if residue.id[1] == residue_number and is_aa(residue):
                    if atom_name in residue:
                        return residue[atom_name].coord
    return None

# Function to create the 3D density map
def calculate_3d_density_map_from_data(df, calpha_cols, cbeta_cols, angle_cols, calpha_bins, cbeta_bins, angle_bins, npy_file='3d_prob_map.npy', excel_file='3d_precomputed_prob_map.xlsx'):
    # Concatenate all CA-Zn distances, CB-Zn distances, and angles
    all_calpha_distances = pd.concat([df[calpha_col] for calpha_col in calpha_cols], ignore_index=True)
    all_cbeta_distances = pd.concat([df[cbeta_col] for cbeta_col in cbeta_cols], ignore_index=True)
    all_angles = pd.concat([df[angle_col] for angle_col in angle_cols], ignore_index=True)

    # Ensure all data series are of the same length
    if len(all_calpha_distances) == len(all_cbeta_distances) == len(all_angles):
        # Create a 3D histogram
        hist, edges = np.histogramdd(
            (all_calpha_distances, all_cbeta_distances, all_angles),
            bins=[calpha_bins, cbeta_bins, angle_bins]
        )

        # Normalize to get probability map
        prob_map = hist / np.sum(hist)

        # Flatten the 3D array and get bin centers for DataFrame
        prob_map_flat = prob_map.flatten()
        calpha_centers = 0.5 * (edges[0][:-1] + edges[0][1:])
        cbeta_centers = 0.5 * (edges[1][:-1] + edges[1][1:])
        angle_centers = 0.5 * (edges[2][:-1] + edges[2][1:])

        # Create DataFrame with multi-index for bins
        index = pd.MultiIndex.from_product([calpha_centers, cbeta_centers, angle_centers], names=['Calpha_Zn_Dist', 'Cbeta_Zn_Dist', 'CA-Zn-CB_Angle'])
        prob_map_df = pd.DataFrame(prob_map_flat, index=index, columns=['Probability']).reset_index()

        # Save the DataFrame to an Excel file
        prob_map_df.to_excel(excel_file, index=False)
        print(f"3D probability map saved to {excel_file}")

        # Save the 3D probability map as a .npy file for further use
        np.save(npy_file, prob_map)
        print(f"3D probability map saved as {npy_file}")

        return prob_map_df
    else:
        print("Data series lengths for distances and angles do not match.")
        return None

# Define bin ranges for distances and angles
calpha_bins = np.arange(3.0, 6.7, 0.2).tolist()
cbeta_bins = np.arange(2.9, 5.9, 0.2).tolist()
angle_bins = np.arange(0, 29, 0.5).tolist()  # Adjusted to 0–30 with 5° increments

# Load calculated distances and angles from the Excel file
calculated_data_path = 'D:/241113_Metal_Intaller_Data_Final/CU/3His/3His_distance_angle.xlsx'  # Update this path as needed
calculated_df = pd.read_excel(calculated_data_path)

# Columns with calculated data
calpha_cols = ['Calpha_Zn_Dist1', 'Calpha_Zn_Dist2', 'Calpha_Zn_Dist3']
cbeta_cols = ['Cbeta_Zn_Dist1', 'Cbeta_Zn_Dist2', 'Cbeta_Zn_Dist3']
angle_cols = ['CA-Zn-CB Angle_1', 'CA-Zn-CB Angle_2', 'CA-Zn-CB Angle_3']

# Output files for the probability map
npy_file = 'D:/241113_Metal_Intaller_Data_Final/CU/3His/3His_3d_prob_map_adjusted.npy'
excel_file = 'D:/241113_Metal_Intaller_Data_Final/CU/3His/3His_0.2_distance_angle_0.5.xlsx'

# Generate and save the 3D probability map
calculate_3d_density_map_from_data(
    calculated_df, calpha_cols, cbeta_cols, angle_cols,
    calpha_bins, cbeta_bins, angle_bins,
    npy_file=npy_file,
    excel_file=excel_file
)
