In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("rope-ptp1b.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,t1:C-N-CA-C,t1:CA-N-C-CA,t1:CB-CA-N-C,t1:CD2-CG-CB-CA,t1:CE-SD-CG-CB,t1:CE1-ND1-CG-CB,t1:CE1-ND1-CG-CD2,t1:CG-CB-CA-C,t1:CG-CD2-NE2-CE1,...,t300:OE1-CD-CG-CB,t300:OE2-CD-CG-CB,t301:C-N-CA-C,t301:CB-CA-N-C,t301:CD-CG-CB-CA,t301:CG-CB-CA-C,t301:N-CA-CB-CG,t301:O-C-CA-CB,t301:O-C-CA-N,Unnamed: 4017
0,PTP1B-y1957_chainA_A,,,,,,,,,,...,,,,,,,,,,
1,PTP1B-y1943_chainA_A,,-160.985,,,58.9103,,,-99.9424,,...,,,,,,,,,,
2,PTP1B-y1938_chainA_A,,-179.781,,,-72.8591,,,64.0251,,...,,,,,,,,,,
3,PTP1B-y1933_chainA_A,,-187.694,,,181.364,,,62.8303,,...,,,,,,,,,,
4,PTP1B-y1922_chainA_A,,,,,,,,,,...,,,,,,,,,,


In [3]:
# filter out only the phi, psi, and omega angles
names = np.array(
    ["Unnamed: 0"]
    + [k for k in df.columns if "C-N-CA-C" in k or "N-CA-C-N" in k or "CA-N-C-CA" in k]
)
bb = df.copy()
bb = bb[names]

# select the columns and show
bb.rename(columns={"Unnamed: 0": "Name"}, inplace=True)
bb.shape

(365, 902)

In [4]:
# select only within the resnum_bounds --> "t7:CA-N-C-CA" and "t279:C-N-CA-C"
resnum_bounds = (7, 279)
get_columns = np.array(
    [
        x
        for x in bb.columns[1:]
        if int(x.split(":")[0][1:]) >= resnum_bounds[0]
        and int(x.split(":")[0][1:]) <= resnum_bounds[1]
    ]
)

# filtering step
filtering = list()

# check if the boundaries have any NaNs
for i, col in enumerate(get_columns):
    # check the boundary conditions
    if (
        int(col.split(":")[0][1:]) == resnum_bounds[0]
        or int(col.split(":")[0][1:]) == resnum_bounds[1]
    ):
        # if there are NaNs return False
        if np.any(bb[col].isna()):
            filtering.append(False)

        else:
            filtering.append(True)

    # otherwise always keep the column for reconstruction
    else:
        filtering.append(True)

# filter the DataFrame according to the desired filtering from above
bb = bb[np.hstack([bb.columns[0], get_columns[np.array(filtering)]])]
bb.shape

(365, 817)

In [5]:
# remove any chains that do not have complete dihedrals across the board
bb = bb.dropna(axis=0, how="any")
bb.shape

(347, 817)

In [6]:
# save the new DataFrame
bb.to_csv("filtered-ptp1b.csv")