In [13]:
import Bio.PDB as PDB
import os
import math

In [5]:
files = {}
for i in os.listdir("PDB"):
    files[i.split(".")[0]] = f"PDB/{i}"

In [32]:
coordinates = {}
for i in files:
    with open(files[i], "r") as pdb:
        coords = []
        for line in pdb:
            if line.startswith("ATOM"):
                x = line[31:38].strip()
                y = line[39:46].strip()
                z = line[47:54].strip()
                if x and y and z:
                    coords.append((float(x), float(y), float(z), line))
        coordinates[i] = coords

In [33]:
def calculate_sasa(coordinates):
    # Define the probe radius (in angstroms) to use in the Shrake-Rupley algorithm
    probe_radius = 1.4

    # Define a function to calculate the distance between two points in 3D space
    def distance(p1, p2):
        return math.sqrt((p1[0]-p2[0])**2 + (p1[1]-p2[1])**2 + (p1[2]-p2[2])**2)

    # Define a function to calculate the surface area of a single atom
    def atom_sasa(coords, atom_idx):
        atom_coord = coords[atom_idx]
        sasa = 0
        for i in range(len(coords)):
            if i != atom_idx:
                d = distance(atom_coord, coords[i])
                if d < 2*probe_radius:
                    sasa += (1 - d/probe_radius/2 + (d/probe_radius/2)**3)*4*math.pi*probe_radius**2
        return sasa

    # Define a function to calculate the solvent-accessible surface area of each atom in a protein
    def sasa(coords):
        sasa_list = []
        for i in range(len(coords)):
            sasa_list.append(atom_sasa(coords, i))
        return sasa_list

    # Define a dictionary to store the solvent-accessible atoms for each protein
    sasa_dict = {}

    # Loop through all the keys (protein names) in the "coordinates" dictionary
    for key in coordinates:
        coords = coordinates[key]
        sasa_list = sasa(coords)
        sasa_atoms = []
        for i in range(len(sasa_list)):
            if sasa_list[i] > 0:
                sasa_atoms.append((coords[i][0], coords[i][1], coords[i][2]))
        sasa_dict[key] = sasa_atoms

    # Return the dictionary of solvent-accessible atoms for each protein
    return sasa_dict

In [35]:
def simple_calculate_sasa(coordinates):
    # Define the probe radius (in angstroms) to use in the Shrake-Rupley algorithm
    probe_radius = 1.4

    # Define a function to calculate the distance between two points in 3D space
    def distance(p1, p2):
        return math.sqrt((p1[0]-p2[0])**2 + (p1[1]-p2[1])**2 + (p1[2]-p2[2])**2)

    # Define a function to calculate the surface area of a single atom
    def atom_sasa(coords, atom_idx):
        atom_coord = coords[atom_idx]
        sasa = 0
        for i in range(len(coords)):
            if i != atom_idx:
                d = distance(atom_coord, coords[i])
                if d < 2*probe_radius:
                    sasa += (1 - d/probe_radius/2 + (d/probe_radius/2)**3)*4*math.pi*probe_radius**2
        return sasa

    # Define a function to calculate the solvent-accessible surface area of each atom in a protein
    def sasa(coords):
        sasa_list = []
        for i in range(len(coords)):
            sasa_list.append(atom_sasa(coords, i))
        return sasa_list

    # Calculate the SASA for the given coordinates
    sasa_list = sasa(coordinates)
    sasa_atoms = []
    for i in range(len(sasa_list)):
        if sasa_list[i] > 0:
            sasa_atoms.append(coordinates[i])

    # Return the list of solvent-accessible atoms for the protein
    return sasa_atoms


In [50]:
with open("test.pdb", "a") as file:
    atoms = simple_calculate_sasa(coordinates['6URC'])
    for i in atoms:
        line = i[3]
        file.write(f"{line}")

In [29]:
coordinates.keys()

dict_keys(['6URC', '2O8M', '4OVN', '1XCT', '6W39', '2N5S', '1IVO', '1NB7', '4UIP', '2EB3', '4JU7', '6DI0', '5FBO', '5HGI', '3KQU', '4R3P', '4OTF', '5WB7', '1K2P', '4JTW', '5TRJ', '7R60', '6DI1', '1NB4', '4OK3', '3FQQ', '2KAV', '5J9Z', '1P9M', '6S9C', '1ZH1', '4JJS', '5WB8', '4OKS', '5VFI', '3QGF', '4NI9', '2IL6', '5PZM', '4YHF', '4A92', '6NFH', '6S9B', '4TY8', '4GMC', '3KQH', '1RGQ', '3W2P', '4R3R', '6LUD', '6XE4', '4JU1', '6KPC', '3W2O', '2DXS', '3GNV', '6MVO', '1XCQ', '6AUB', '6S8A', '6SHC', '5ZZ4', '4OK5', '5PZK', '4KRO', '7KXP', '4J4L', '6HV0', '2XNI', '6W3A', '5PZN', '7KXO', '3B2V', '6BIK', '3KEE', '6DI9', '6JXT', '3Q0Z', '6W3K', '7KXN', '5TGZ', '3QGD', '1IL6', '5KUP', '7KXM', '4JU4', '6KPF', '7BMK', '3KQL', '4IZ0', '4JVQ', '2ITU', '6MNY', '4OOW', '6NFI', '4NI7', '5TRK', '6X3N', '6O8I', '3KQN', '2P59', '6JWL', '4LI5', '3BEL', '6S9D', '6BKW', '4Z7H', '3FRZ', '2ITZ', '4J08', '3MWV', '2ITV', '4ZLZ', '4NLD', '5Y25', '2XHU', '3QWQ', '5TWN', '3KQK', '3OCS', '5FBN', '4OJQ', '6BKE', '4J0A