In [1]:
from Bio.PDB import PDBParser
from src.ChainUnit import ChainUnit
import os
import pandas as pd
import numpy as np

np.set_printoptions(
            formatter={"float": lambda x: "{0:0.3f}".format(x)},
            linewidth=100,
            suppress=True,
        )

In [2]:
pdb_path = "data/processed_pdbs/3UDG.pdb"
parser = PDBParser()
structure = parser.get_structure("3UDG", pdb_path)



In [3]:
chain = structure[0]["A"]

res = chain.get_residues().__next__()
unit = ChainUnit(res)
print(unit.coms)

{'ca_coords': [-2.455 1.216 -13.840], 'sidechain_com': [-3.823 3.243 -9.949]}


In [4]:
unit.fidelity

True

In [5]:
unit.coms

{'ca_coords': [-2.455 1.216 -13.840], 'sidechain_com': [-3.823 3.243 -9.949]}

In [6]:
unit.to_dict()

{'resname': 'ARG',
 'id': 3,
 'unit_type': 'type_2',
 'ca_coords': [-2.455 1.216 -13.840],
 'sidechain_com': [-3.823 3.243 -9.949]}

In [7]:
from src.Chain import Chain

In [8]:
chain1 = Chain(chain)
print(chain1.df.head())

  resname  id unit_type                ca_coords           sidechain_com
0     ARG   3    type_2   [-2.455 1.216 -13.840]   [-3.823 3.243 -9.949]
1     GLY   4    type_1  [-0.123 -0.589 -11.553]                    None
2     MET   5    type_2   [-0.004 -2.524 -8.344]  [-2.114 -4.600 -6.635]
3     ASN   6    type_1    [3.231 -3.630 -6.904]                    None
4     HIS   7    type_2    [3.802 -5.462 -3.595]   [1.020 -4.071 -2.956]


In [9]:
chain1.get_bb_distances()

array([3.732, 3.749, 3.710, 3.825, 3.726, 3.749, 3.701, 3.795, 3.767, 3.828, 3.782, 3.801, 3.819,
       3.816, 3.765, 3.815, 3.796, 3.816, 3.900, 3.817, 3.879, 3.812, 3.843, 3.860, 3.838, 3.845,
       3.752, 3.792, 3.758, 3.741, 3.748, 3.763, 3.688, 3.770, 3.776, 3.798, 3.841, 3.808, 3.827,
       4.168, 3.832, 3.805, 3.811, 3.850, 3.815, 3.833, 3.799, 3.758, 3.751, 3.802, 3.810, 3.823,
       3.806, 3.762, 3.824, 3.932, 3.774, 3.785, 3.817, 3.844, 3.802, 3.829, 3.841, 3.824, 3.790,
       3.808, 3.747, 3.840, 3.780, 3.806, 3.763, 3.687, 3.772, 3.768, 3.793, 3.753, 3.821, 3.830,
       3.862, 3.788, 3.883, 3.845, 3.792, 3.944, 3.814, 3.786, 3.842, 3.810, 3.766, 3.821, 3.790,
       3.769, 3.830, 3.838, 3.798, 3.826, 3.810, 3.751, 3.800, 3.811, 3.762, 3.860, 3.756, 3.839,
       3.866, 3.706, 3.804, 3.748, 3.785, 3.850, 3.858, 3.845, 3.808, 3.844, 3.811, 3.761, 3.736,
       3.805, 3.784, 3.793, 3.793, 3.802, 3.718, 3.732, 3.791, 3.779, 3.734, 3.748, 3.771, 3.805,
       3.797, 3.851,

In [10]:
df_bs = chain1.get_bs_distances()
df_bs["distance"] = df_bs["distance"].astype(np.float64)
df_bs["distance"].describe()

count    86.000000
mean      3.518401
std       0.649517
min       2.054712
25%       3.169668
50%       3.449373
75%       3.880037
max       4.804745
Name: distance, dtype: float64

In [11]:
df_bbb = chain1.get_bbb_angles()
df_bbb

array([142.562, 120.917, 127.499, 135.007, 125.746, 121.101, 134.010, 109.975, 144.909, 129.664,
       100.425, 87.347, 139.590, 117.843, 121.655, 131.774, 116.764, 140.495, 132.088, 126.554,
       88.865, 86.893, 93.030, 112.675, 117.244, 121.553, 114.431, 132.179, 137.423, 109.369,
       114.674, 137.924, 151.677, 125.249, 124.177, 95.362, 105.497, 99.583, 68.763, 102.062,
       130.697, 104.581, 111.074, 123.064, 139.036, 119.410, 131.737, 105.474, 121.615, 134.477,
       118.078, 133.755, 127.796, 94.085, 85.167, 87.898, 92.625, 87.414, 94.170, 89.366, 87.199,
       89.681, 98.853, 117.686, 138.718, 107.470, 99.557, 111.429, 117.704, 141.821, 116.273,
       129.269, 110.897, 149.192, 136.124, 110.592, 118.765, 115.022, 129.630, 121.429, 143.222,
       112.589, 115.679, 85.486, 88.573, 129.212, 137.436, 119.401, 131.348, 138.045, 141.121,
       115.140, 136.276, 120.409, 109.097, 119.179, 86.014, 142.251, 124.988, 130.476, 121.786,
       116.778, 94.656, 127.451, 105.467, 

In [12]:
df_sbb = chain1.get_sbb_angles()
df_sbb

Unnamed: 0,resname,angle
0,ARG,83.129474
1,MET,99.386493
2,HIS,144.573243
3,TYR,156.759283
4,ILE,130.257182
...,...,...
80,GLU,103.112214
81,ARG,135.314411
82,GLU,112.281932
83,ARG,94.201194


In [13]:
df_bbs = chain1.get_bbs_angles()
df_bbs

Unnamed: 0,resname,angle
0,GLY,136.210745
1,ASN,80.419748
2,VAL,82.079066
3,LEU,118.721166
4,ALA,103.794029
...,...,...
80,THR,88.278482
81,VAL,142.443493
82,THR,129.749094
83,VAL,99.273487


In [14]:
df_bbbb = chain1.get_bbbb_dihedrals()
df_bbbb

array([174.414, 176.819, 156.150, 176.441, 170.352, 177.711, 170.305, 119.744, 88.622, 103.328,
       150.102, 22.310, 38.720, 125.327, 141.373, 149.188, 165.686, 131.226, 137.227, 94.698,
       53.324, 108.511, 156.027, 124.544, 169.524, 163.276, 168.233, 163.010, 132.702, 163.870,
       179.573, 140.399, 113.273, 149.842, 137.066, 159.780, 131.154, 73.523, 98.914, 137.896,
       136.898, 172.606, 161.998, 178.624, 161.441, 176.093, 132.591, 166.869, 167.262, 163.484,
       154.296, 5.244, 13.252, 58.777, 63.745, 41.851, 53.231, 48.515, 50.566, 63.859, 66.685,
       143.159, 172.898, 156.141, 70.064, 26.779, 131.607, 130.591, 175.539, 132.883, 164.806,
       168.265, 127.204, 109.396, 97.963, 158.291, 155.527, 172.408, 169.787, 153.450, 105.090,
       128.365, 118.488, 57.660, 99.231, 82.405, 72.745, 175.942, 179.013, 171.917, 134.546,
       164.843, 90.650, 165.596, 144.033, 174.675, 8.793, 173.677, 179.343, 116.878, 119.915,
       140.249, 65.505, 111.435, 147.197, 95.024,

In [15]:
df_sbbs = chain1.get_sbbs_dihedrals()
df_sbbs

array([164.663, 167.969, 151.664, 120.246, 177.360, 105.402, 40.222, 18.172, 38.283, 84.803,
       176.705, 164.839, 135.334, 174.341, 161.812, 168.640, 152.916, 76.476, 114.493, 135.525,
       86.123, 132.578, 105.967, 56.996, 149.177, 168.505, 136.955, 156.266, 165.153, 136.727,
       39.831, 150.005, 8.940, 16.429, 162.606, 166.769])

In [16]:
df_sbbb = chain1.get_sbbb_dihedrals()
df_sbbb

Unnamed: 0,resname,dihedral
0,ARG,49.410929
1,MET,21.029216
2,HIS,3.434827
3,TYR,6.789016
4,ILE,72.357181
...,...,...
80,GLU,168.878247
81,ARG,78.275748
82,GLU,28.601990
83,ARG,0.404488


In [18]:
df_bbbs = chain1.get_bbbs_dihedrals()
df_bbbs

Unnamed: 0,resname,dihedral
0,GLY,31.497602
1,ASN,23.923081
2,VAL,11.438883
3,LEU,20.206917
4,ALA,126.223752
...,...,...
80,THR,25.660358
81,VAL,58.022595
82,THR,159.751221
83,VAL,6.883259


In [17]:
for chain in structure[0]:
    print(Chain(chain).df.head())
    print("\n")


  resname  id unit_type                ca_coords           sidechain_com
0     ARG   3    type_2   [-2.455 1.216 -13.840]   [-3.823 3.243 -9.949]
1     GLY   4    type_1  [-0.123 -0.589 -11.553]                    None
2     MET   5    type_2   [-0.004 -2.524 -8.344]  [-2.114 -4.600 -6.635]
3     ASN   6    type_1    [3.231 -3.630 -6.904]                    None
4     HIS   7    type_2    [3.802 -5.462 -3.595]   [1.020 -4.071 -2.956]


  resname  id unit_type                ca_coords            sidechain_com
0     GLY   4    type_1  [27.988 -53.306 31.392]                     None
1     MET   5    type_2  [26.410 -51.939 28.155]  [23.782 -52.619 26.048]
2     ASN   6    type_1  [27.313 -48.616 26.598]                     None
3     HIS   7    type_2  [26.298 -47.375 23.128]  [26.249 -50.502 22.392]
4     VAL   8    type_1  [26.078 -43.942 21.493]                     None


  resname  id unit_type                ca_coords            sidechain_com
0     ARG   3    type_2   [34.013 -52.25

In [None]:
from src.Structure import Structure

structure1 = Structure("3UDG", structure)

In [None]:
print(structure1)

Number of chains: 9
00. Chain type: protein
Number of units: 211
 01. Chain type: protein
Number of units: 214
 02. Chain type: protein
Number of units: 214
 03. Chain type: ssdna
Number of units: 5
 04. Chain type: ssdna
Number of units: 5
 05. Chain type: ssdna
Number of units: 4
 06. Chain type: ssdna
Number of units: 5
 07. Chain type: ssdna
Number of units: 3
 08. Chain type: ssdna
Number of units: 3



In [None]:
structure1.save_as_markdown(show=True)

In [None]:
from src.Point import Point
import numpy as np

In [None]:
p = Point(np.array([0.213124312, 0, 0]))
print(p)

[0.213 0.000 0.000]


In [None]:
for chain in structure1.chains:
    print(chain)
    

Chain type: protein
Number of units: 211

Chain type: protein
Number of units: 214

Chain type: protein
Number of units: 214

Chain type: ssdna
Number of units: 5

Chain type: ssdna
Number of units: 5

Chain type: ssdna
Number of units: 4

Chain type: ssdna
Number of units: 5

Chain type: ssdna
Number of units: 3

Chain type: ssdna
Number of units: 3



In [20]:
chain1 = structure1.chains[0]
for i in range(len(chain1.units)-1):
    ca_1 = chain1.units[i].coms["ca_coords"]
    ca_2 = chain1.units[i+1].coms["ca_coords"]

    

    

Name of the unit: ARG
ID of the unit: 3
Type of the unit: type_2
 Name of the unit: GLY
ID of the unit: 4
Type of the unit: type_1

Name of the unit: GLY
ID of the unit: 4
Type of the unit: type_1
 Name of the unit: MET
ID of the unit: 5
Type of the unit: type_2

Name of the unit: MET
ID of the unit: 5
Type of the unit: type_2
 Name of the unit: ASN
ID of the unit: 6
Type of the unit: type_1

Name of the unit: ASN
ID of the unit: 6
Type of the unit: type_1
 Name of the unit: HIS
ID of the unit: 7
Type of the unit: type_2

Name of the unit: HIS
ID of the unit: 7
Type of the unit: type_2
 Name of the unit: VAL
ID of the unit: 8
Type of the unit: type_1

Name of the unit: VAL
ID of the unit: 8
Type of the unit: type_1
 Name of the unit: TYR
ID of the unit: 9
Type of the unit: type_2

Name of the unit: TYR
ID of the unit: 9
Type of the unit: type_2
 Name of the unit: LEU
ID of the unit: 10
Type of the unit: type_1

Name of the unit: LEU
ID of the unit: 10
Type of the unit: type_1
 Name of 