# Check of the NOE quantitative reliability

In [1]:
import pandas as pd
import numpy as np

from functions import *

In [2]:
pdb_ids = ['2LEA', '2K52', '2LTM', '2KD0', '2LF2']
pdb_id = pdb_ids[0]

## Reading the individual 3D $^{15}N$-NOESY peak lists

In [3]:
path = f'~/Sparky/Lists/{pdb_id}.list' # set correctly

# Reading the data
df = pd.read_csv(path, header=0, index_col=None, sep='\s+')
df = tidy_list(df)

# Why do we have negative NOEs? 
# Anyway, the phase is not important for this analysis, just remove the sign
df['height'] = np.abs(df.height)

df

Unnamed: 0,res,noe,N,Hn,H,height,noe_res,inter,resnum,noe_resnum,res_diff,atom_type,atom_type_pos
0,S1,H,116.240,8.111,8.111,1571,S1,False,1,1,0,H,H_i
1,S1,HA,116.240,8.111,4.389,1756,S1,False,1,1,0,HA,HA_i
2,S1,HB2,116.240,8.111,3.750,2457,S1,False,1,1,0,HB,HB_i
3,S1,HB3,116.240,8.111,3.750,2457,S1,False,1,1,0,HB,HB_i
4,S1,Y2H,116.240,8.111,8.062,554,Y2,True,1,2,-1,H,H_i+1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1789,S100,H99HB3,123.154,8.119,3.245,1350,S100,False,100,100,0,H,H_i
1790,S100,H,123.154,8.119,8.119,69967,S100,False,100,100,0,H,H_i
1791,S100,HA,123.154,8.119,4.258,2956,S100,False,100,100,0,HA,HA_i
1792,S100,HB2,123.154,8.119,3.849,3318,S100,False,100,100,0,HB,HB_i


Removing the side-chains

In [4]:
df = df.loc[~ (df.res.str.contains('ND') | df.res.str.contains('NE'))]

Remove the backbone-to-sidechain contacts (they are too intense or just overlapped)

In [5]:
df = df.loc[~ (df.noe.str.contains('HG') | df.noe.str.contains('HD') | df.noe.str.contains('HE'))]

Removing the diagonals

In [6]:
df = df.query('Hn != H')

In [7]:
df.shape

(1161, 13)

# Analysis

In [8]:
df

Unnamed: 0,res,noe,N,Hn,H,height,noe_res,inter,resnum,noe_resnum,res_diff,atom_type,atom_type_pos
1,S1,HA,116.240,8.111,4.389,1756,S1,False,1,1,0,HA,HA_i
2,S1,HB2,116.240,8.111,3.750,2457,S1,False,1,1,0,HB,HB_i
3,S1,HB3,116.240,8.111,3.750,2457,S1,False,1,1,0,HB,HB_i
4,S1,Y2H,116.240,8.111,8.062,554,Y2,True,1,2,-1,H,H_i+1
5,Y2,S1H,121.776,8.062,8.111,8140,S1,True,2,1,1,H,H_i-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1788,S100,H99HB2,123.154,8.119,3.168,1705,S100,False,100,100,0,H,H_i
1789,S100,H99HB3,123.154,8.119,3.245,1350,S100,False,100,100,0,H,H_i
1791,S100,HA,123.154,8.119,4.258,2956,S100,False,100,100,0,HA,HA_i
1792,S100,HB2,123.154,8.119,3.849,3318,S100,False,100,100,0,HB,HB_i


In [9]:
get_atom_rank_matrix(df, exclude_sc=True)

Unnamed: 0_level_0,1st highest,2nd highest,3rd highest,Lower
Atom name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
HA_i,30,38,20,5
HA_i-1,41,14,10,25
H_i-1,7,15,18,43


In [10]:
get_atoms_w_strongest_noes(df)

Unnamed: 0_level_0,count
Atom type,Unnamed: 1_level_1
HA_i-1,35
HB_i,23
HA_i,9
HB_i-1,6
H_i+1,4
H_i-1,4
H_i-2,2
H_i+2,2
HA_i-2,2
HA_i-3,2


## $H^{i-k}_{\alpha}$

How many $H^{i-k}_{\alpha}$s are stronger than $H^{i}_{\alpha}$?

>Expected from the theory: None. Because any $H^{i-k}_{\alpha}$s are further away from the magnetization source - which is $H^N$ - than $H^{i}_{\alpha}$s

In [11]:
df_intra_Ha = df[~df.inter & df.noe.str.contains('HA')]
df_inter_Ha = df[df.inter & df.noe.str.contains('HA')]

In [12]:
n_anomalies = get_n_anomalies(df_strong=df_intra_Ha,
                              df_weak=df_inter_Ha)

45


For 2LEA, there are 45 cases where an inter-residual NOE is stronger than the very own NOE peak!

In [13]:
# For HAs only:
compare_strongest_noes(df_intra_Ha, df_inter_Ha)

Unnamed: 0_level_0,height_intra,height_inter,noe_resnum
resnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1756,0,0
2,5887,13351,1
3,11348,7299,2
4,8560,28898,3
8,14410,32052,7
...,...,...,...
92,848,631,91
92,457,631,91
93,7823,13397,92
97,2611,0,0
