In [8]:
#Import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.spatial import distance


# 1.- read protein data and select alpha carbon backbone only
protein = pd.read_csv("1q6c.txt", delim_whitespace = True, header = None)
backbone = protein[protein[2].isin(['CA'])]


def compute_multiple_ecc(backbone, n):

    # 2.- Define a box around the protein's structure by extracting the max and min x, y and z values, and defining 8 vertices with 
    # the combinations of max and min x,y,z.
    # CAREFUL: We are assuming a backbone's [6] is x, [7] is y, and [8] is z

     # get max and min x,y,z. 
     #max(coordinate) + 1 or min(coordinate) -1 ensures that the box´s faces don't touch any protein atoms. 

    max_x = max(backbone[6]) + 1
    max_y = max(backbone[7]) + 1
    max_z = max(backbone[8]) + 1

    min_x = min(backbone[6]) - 1
    min_y = min(backbone[7]) - 1
    min_z = min(backbone[8]) - 1

    #define 8 different vertices of our protein box by getting the 8 possible combinations of max and min x,y,z.
    v1 = [max_x,max_y,max_z]
    v2 = [max_x,max_y,min_z]
    v3 = [max_x,min_y,max_z]
    v4 = [min_x,max_y,max_z]
    v5 = [min_x,min_y,max_z]
    v6 = [min_x,max_y,min_z]
    v7 = [max_x,min_y,min_z]
    v8 = [min_x,min_y,min_z]


    vertices = [v1,v2,v3,v4,v5,v6,v7,v8]

    # 3.- Calculate the location of our protein box's center by averaging the coordinates of all vertices
    #Calculate the box's center by averaging the coordinates of the vertices
    box_center = np.array(np.mean(vertices, axis=0))

    # 4.- Calculate distance between box's center and any vertex
    # This will be our radius of a sphere that encompasses the whole protein structure
    radius = distance.euclidean(np.array(vertices[0]), box_center)

    # 5.- Use radius and box's center as starting point to define a fibonacci sphere of n evenly spaced points
    fibonacci_points = []
    offset = 2.0 / n
    increment = np.pi * (3.0 - np.sqrt(5.0))

    for i in range(n):
        y = ((i * offset) - 1) + (offset / 2)
        r = np.sqrt(1 - pow(y, 2))

        phi = ((i + 1) % n) * increment

        x = np.cos(phi) * r
        z = np.sin(phi) * r

        fibonacci_points.append([x * radius + box_center[0], y * radius + box_center[1], z * radius + box_center[2]])

    fibonacci_points = pd.DataFrame(fibonacci_points)
    # 6.- Calculate n different normal vectors v defined as v = (P - Q)*2,
    # where P is the box's center, and Q is a starting point in the fibonacci sphere. 
    # (P-Q) is multiplied by 2 so that the normal vectors span the sphere's entire diameter.  
    directions = pd.DataFrame((box_center - fibonacci_points)*2)

    #7.- Compute ECC for each of the n points in the fibonacci sphere defined as starting points, using the n diferent normal vectors
    # and concatenate into one large vector.
    data = np.array(backbone[[6,7,8]])





    output = []

    for k in range(n):

        starting_point=fibonacci_points.iloc[k,:]

        normal = directions.iloc[k,:]

        ecc = []

        for i in np.linspace(0,1,100):
            point  = i*normal+starting_point

            d = -point.dot(normal)

            # find all points below the given plane
            sublevel_pts = np.where(data.dot(normal) + d < 0)[0]

            # find connected components (connected components will have consecutive vertices)
            consec_vertices = np.split(sublevel_pts, np.where(np.diff(sublevel_pts) != 1)[0]+1)
            # the number of edges is one less than the length of the consecutive vertices
            edges = sum([len(j)-1 for j in consec_vertices])
            points = len(sublevel_pts)

            # the above step is coded in a way such that if there are no points, there are -1 edges, which we don't want
            # if there are no points, there should be 0 edges
            if edges < 0:
                edges = 0
            # compute Euler characteristic
            ecc.append(points-edges)

        output.append(ecc)

    output = np.concatenate(output)
    
    return output


In [12]:
compute_multiple_ecc(backbone,10)

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  4,  6,  5,  9,
       11, 10,  7, 11, 10, 10, 16, 16, 16, 17, 14, 18, 20, 18, 15, 16, 12,
       14, 14, 13, 14, 13, 13, 11, 12, 14, 14, 10,  8,  8,  6,  7,  8,  9,
        5,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  1,  3,  6,  4,  5,  5,  5,  7,  7,  9,  7,  7,  7,  7,  9,
        9, 11, 11, 12,  9, 12, 11,  9, 11, 10, 11, 10, 10, 11, 13, 12, 16,
       12, 15, 12, 14, 12, 16, 17, 15, 13, 11, 10,  8,  4,  4,  4,  3,  2,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,
        2,  3,  3,  5,  7