# Check of the NOE quantitative reliability

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from functions import *

plt.rcParams['figure.dpi'] = 300

In [2]:
pdb_ids = ['2LEA', '2K52', '2LTM', '2KD0', '2LF2']
pdb_id = pdb_ids[0]

## Reading the individual 3D $^{15}N$-NOESY peak lists

In [3]:
path = f'~/Sparky/Lists/{pdb_id}.list' # set correctly

# Reading the data
df = pd.read_csv(path, header=0, index_col=None, sep='\s+')
df = tidy_list(df)

# Why do we have negative NOEs? 
# Anyway, the phase is not important for this analysis, just remove the sign
df['height'] = np.abs(df.height)

df

Unnamed: 0,res,noe,N,Hn,H,height,noe_res,inter,resnum,noe_resnum,res_diff,atom_type,atom_type_pos
0,S1,H,116.240,8.111,8.111,1571,S1,False,1,1,0,H,H_i
1,S1,HA,116.240,8.111,4.389,1756,S1,False,1,1,0,HA,HA_i
2,S1,HB2,116.240,8.111,3.750,2457,S1,False,1,1,0,HB,HB_i
3,S1,HB3,116.240,8.111,3.750,2457,S1,False,1,1,0,HB,HB_i
4,S1,Y2H,116.240,8.111,8.062,554,Y2,True,1,2,-1,H,H_i+1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1789,S100,H99HB3,123.154,8.119,3.245,1350,S100,False,100,100,0,H,H_i
1790,S100,H,123.154,8.119,8.119,69967,S100,False,100,100,0,H,H_i
1791,S100,HA,123.154,8.119,4.258,2956,S100,False,100,100,0,HA,HA_i
1792,S100,HB2,123.154,8.119,3.849,3318,S100,False,100,100,0,HB,HB_i


Removing the side-chains

In [4]:
df = df.loc[~ (df.res.str.contains('ND') | df.res.str.contains('NE'))]

Remove the backbone-to-sidechain contacts (they are too intense or just overlapped)

In [5]:
df = df.loc[~ (df.noe.str.contains('HG') | df.noe.str.contains('HD') | df.noe.str.contains('HE'))]

Removing the diagonals

In [6]:
df = df.query('Hn != H')

In [7]:
df.shape

(1161, 13)

# Analysis

In [8]:
get_atom_rank_matrix(df, exclude_sc=True)

Unnamed: 0_level_0,1st highest,2nd highest,3rd highest,4th or lower
Atom name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
HA_i,30,38,20,5
HA_i-1,41,14,10,25
H_i-1,7,15,18,43


In [9]:
get_atoms_w_strongest_noes(df)

Unnamed: 0_level_0,count
Atom type,Unnamed: 1_level_1
HA_i-1,35
HB_i,23
HA_i,9
HB_i-1,6
H_i+1,4
H_i-1,4
H_i-2,2
H_i+2,2
HA_i-2,2
HA_i-3,2


> It would be also useful the see for each of the HA_i, HA_i-1, H_i-1 the following numbers:

> - average relative intensity with respect to the spin system's maximum intensity
> - minimum relative intensity
> - the frequencies you computed above but considering only HN protons
> - the frequencies you computed above but considering only HA protons

Intenities only among HN peaks

In [17]:
df_hn = df.loc[df.atom_type == "H"]
get_atom_rank_matrix(df_hn) #, exclude_sc=True) - obsolete since the df is already filtered for amide protons

Unnamed: 0_level_0,1st highest,2nd highest,3rd highest,4th or lower
Atom name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
H_i-1,44,29,6,4


In [18]:
df_ha = df.loc[df.atom_type == "HA"]
get_atom_rank_matrix(df_ha)  

Unnamed: 0_level_0,1st highest,2nd highest,3rd highest,4th or lower
Atom name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
HA_i,43,34,14,2
HA_i-1,44,25,19,2


### Intensity distribution

In [None]:
df.height.plot(kind='hist', bins=200)

In [None]:
df_bb = df.loc[df.atom_type_pos.isin(['H_i-1', 'HA_i', 'HA_i-1'])]
df_bb['atom_type_pos'].replace({'H_i-1': '$H_{i-1}$', 'HA_i-1': '$H^A_{i-1}$', 'HA_i': '$H^A_{i}$'}, inplace=True)
df_bb.rename({"atom_type_pos": 'Atom type'}, axis=1, inplace=True)

ax = sns.histplot(df_bb, x='height', hue='Atom type', 
                  element="step",
                  kde=True, common_norm=False,  
                  palette=sns.blend_palette(['red', 'blue', 'orange'], n_colors=3),
                  line_kws={"linewidth": 3, }
                    )
#sns.histplot(df_bb, x='height', hue='Atom type', bins=150, ax=ax)
sns.despine(left=True, bottom=True)
plt.xlabel("NOE peak height")
plt.gca().get_yaxis().set_visible(False)
plt.title(f"Intensity distribution of NOE peaks in 3D 15N NOESY of {pdb_id}");

In [None]:
df.res.unique().shape

### Average intensities in the spin systems

## $H^{i-k}_{\alpha}$

How many $H^{i-k}_{\alpha}$s are stronger than $H^{i}_{\alpha}$?

>Expected from the theory: None. Because any $H^{i-k}_{\alpha}$s are further away from the magnetization source - which is $H^N$ - than $H^{i}_{\alpha}$s

In [None]:
df_intra_Ha = df[~df.inter & df.noe.str.contains('HA')]
df_inter_Ha = df[df.inter & df.noe.str.contains('HA')]

In [None]:
n_anomalies = get_n_anomalies(df_strong=df_intra_Ha,
                              df_weak=df_inter_Ha)

For 2LEA, there are 45 cases where an inter-residual NOE is stronger than the very own NOE peak!

In [None]:
# For HAs only:
compare_strongest_noes(df_intra_Ha, df_inter_Ha)