# Processing energies

Here we process extracted data to get interaction energies:

In [1]:
# imports
import pandas as pd

In [2]:
# loading data
df = pd.read_csv('data/total_energies.csv')
df

Unnamed: 0,Mainfile,Formula,Entry_ID,Upload_ID,Total_energy
0,AminoAcids/glu/Dipeptide/Ba/conf_0047/FHIaims.out,BaC8H13N2O4,---sV-GhvU9x9aLIxYIaSaRRMYb7,OhCdHFfSTLu8g6CGBISk4w,-244828.848890
1,AminoAcids/argH/Dipeptide/bare/conf_0855/FHIai...,C9H20N5O2,--6kkazavrym5kv8a9t9i9xFsVUY,MTSFEw9oS1GRiY0LVMgOYw,-21201.159951
2,AminoAcids/lysH/Dipeptide/bare/conf_0528/FHIai...,C9H20N3O2,--DqMR6pHAuouHktSJGR58iTBYmm,SXdit6b0RtibOeWIk3dulw,-18219.475813
3,AminoAcids/lys/Dipeptide/bare/conf_1802/FHIaim...,C9H19N3O2,--GsPYTZbOQDIzkGgdrzmr_40vc4,hSvqxrr0RjOhzwYU0stsOg,-18209.078055
4,AminoAcids/met/Dipeptide/Ca/conf_0159/FHIaims.out,C8CaH16N2O2S,--PlSeBOD6yCTDHBSdgPE5cEdzof,oHhmKWH2RHyyq7I2_Zdtkg,-45055.995082
...,...,...,...,...,...
23235,AminoAcids/lysH/Dipeptide/Mg/conf_0180/FHIaims...,C9H20MgN3O2,zyGRZ-2zfy1iCrFASB2RxClsAswM,3mF4CEoTReGHEdeR4sFfnw,-23655.925188
23236,AminoAcids/lysH/Dipeptide/Ba/conf_0189/FHIaims...,BaC9H20N3O2,zyY1Y-fXObAwli-dLx4uzH3hzuef,ZnZLnzQUTbiqfI_XO3s4Aw,-243352.333824
23237,AminoAcids/lysH/Dipeptide/Ba/conf_0539/FHIaims...,BaC9H20N3O2,zy_65vIS9xvjArhxdHziwln8IIr4,ZnZLnzQUTbiqfI_XO3s4Aw,-243351.595025
23238,AminoAcids/arg/Dipeptide/bare/conf_2884/FHIaim...,C9H19N5O2,zymyALFYVstb5DXBA3b6I5I2MBOI,N-T_ymXOSk-DXMIjvGyFrQ,-21190.209803


In [3]:
# get metal and aminoacid
df['Aminoacid'] = df['Mainfile'].apply(lambda x: x.split('/')[1])
df['Cation'] = df['Mainfile'].apply(lambda x: x.split('/')[3])
df = df[['Aminoacid', 'Cation', 'Total_energy']]
df

Unnamed: 0,Aminoacid,Cation,Total_energy
0,glu,Ba,-244828.848890
1,argH,bare,-21201.159951
2,lysH,bare,-18219.475813
3,lys,bare,-18209.078055
4,met,Ca,-45055.995082
...,...,...,...
23235,lysH,Mg,-23655.925188
23236,lysH,Ba,-243352.333824
23237,lysH,Ba,-243351.595025
23238,arg,bare,-21190.209803


In [4]:
# get min energies for each system
edf = df.groupby(['Aminoacid', 'Cation']).min().reset_index()
edf

Unnamed: 0,Aminoacid,Cation,Total_energy
0,ala,Ba,-238632.421019
1,ala,Ca,-32039.458764
2,ala,Mg,-18935.682552
3,ala,bare,-13494.836125
4,arg,Ba,-246330.134028
...,...,...,...
99,tyr,bare,-21829.375306
100,val,Ba,-240771.381368
101,val,Ca,-34178.447579
102,val,Mg,-21074.870157


In [5]:
# pivot metals
edf = edf.pivot(index='Aminoacid', columns='Cation', values='Total_energy').reset_index().rename_axis(None, axis = 1)
edf

Unnamed: 0,Aminoacid,Ba,Ca,Mg,bare
0,ala,-238632.421019,-32039.458764,-18935.682552,-13494.836125
1,arg,-246330.134028,-39737.682035,-26634.983857,-21191.050328
2,argH,-246335.375896,-39742.307368,-26638.421192,-21202.194401
3,asn,-243225.964309,-36633.343923,-23530.092416,-18087.583754
4,asp,-243759.839553,-37167.38148,-24064.153242,-18614.739237
5,aspH,-243767.207694,-37174.548699,-24071.247717,-18629.212924
6,cys,-249511.174067,-42918.418437,-29815.044755,-24373.456279
7,gln,-244295.781748,-37703.200037,-24600.047854,-19156.975091
8,glu,-244829.487512,-38237.074533,-25133.90934,-19684.239885
9,gluH,-244837.015187,-38244.254787,-25141.055706,-19698.630019


In [6]:
# combine hisE and hisD
idxs = edf['Aminoacid'].isin(['hisD', 'hisE'])
his = {c: edf.loc[idxs, c].min() for c in edf.columns[1:]}
his['Aminoacid'] = 'his'

# sub hisE and hisD with his
edf = pd.concat([edf.loc[~idxs], pd.DataFrame([his])], ignore_index=True).sort_values('Aminoacid', ignore_index=True)
edf

Unnamed: 0,Aminoacid,Ba,Ca,Mg,bare
0,ala,-238632.421019,-32039.458764,-18935.682552,-13494.836125
1,arg,-246330.134028,-39737.682035,-26634.983857,-21191.050328
2,argH,-246335.375896,-39742.307368,-26638.421192,-21202.194401
3,asn,-243225.964309,-36633.343923,-23530.092416,-18087.583754
4,asp,-243759.839553,-37167.38148,-24064.153242,-18614.739237
5,aspH,-243767.207694,-37174.548699,-24071.247717,-18629.212924
6,cys,-249511.174067,-42918.418437,-29815.044755,-24373.456279
7,gln,-244295.781748,-37703.200037,-24600.047854,-19156.975091
8,glu,-244829.487512,-38237.074533,-25133.90934,-19684.239885
9,gluH,-244837.015187,-38244.254787,-25141.055706,-19698.630019


In [7]:
# compute interaction energies
for M in ('Mg', 'Ca', 'Ba'):
    edf[f'E_{M}'] = edf[M] - edf['bare']
    edf[f'E_{M}'] = edf[f'E_{M}'] - edf.loc[edf['Aminoacid'] == 'gly', f'E_{M}'].values[0]
edf = edf[['Aminoacid', 'E_Mg', 'E_Ca', 'E_Ba']]
edf

Unnamed: 0,Aminoacid,E_Mg,E_Ca,E_Ba
0,ala,-0.138234,-0.118715,-0.100234
1,arg,-3.225336,-2.127782,-1.59904
2,argH,4.481403,4.390957,4.303165
3,asn,-1.800469,-1.256246,-0.895895
4,asp,-8.705812,-8.138319,-7.615657
5,aspH,-1.3266,-0.831851,-0.510109
6,cys,-0.880283,-0.458234,-0.233128
7,gln,-2.36457,-1.721022,-1.321998
8,glu,-8.961262,-8.330724,-7.762967
9,gluH,-1.717493,-1.120843,-0.900507


In [8]:
# extract ionic systems sidechains
ions = ['lysH', 'argH', 'hisH', 'asp', 'glu']
charged = edf.loc[edf['Aminoacid'].isin(ions)].reset_index(drop=True)
edf = edf.loc[~edf['Aminoacid'].isin(ions)].reset_index(drop=True)
len(edf), len(charged)

(20, 5)

In [9]:
# remove 'H' from aminoacid names
for x in (edf, charged):
    x.loc[:,'Aminoacid'] = x['Aminoacid'].str.replace('H', '')
charged = charged.rename(columns = {f'E_{M}': f'E_{M}_ion' for M in ('Mg', 'Ca', 'Ba')})
charged

Unnamed: 0,Aminoacid,E_Mg_ion,E_Ca_ion,E_Ba_ion
0,arg,4.481403,4.390957,4.303165
1,asp,-8.705812,-8.138319,-7.615657
2,glu,-8.961262,-8.330724,-7.762967
3,his,5.551417,5.389953,5.226539
4,lys,4.78439,4.69265,4.603275


In [10]:
# add columns for ionized sidechains
for M in ('Mg', 'Ca', 'Ba'):
    edf[f'E_{M}_ion'] = edf[f'E_{M}']

# substitute values for ionised AAs
idxs = edf['Aminoacid'].isin(charged['Aminoacid'])
cols = [f'E_{M}_ion'  for M in ('Mg', 'Ca', 'Ba')]
charged.index = edf.loc[idxs, cols].index
edf.loc[idxs, cols] = charged.loc[:, cols]
edf

Unnamed: 0,Aminoacid,E_Mg,E_Ca,E_Ba,E_Mg_ion,E_Ca_ion,E_Ba_ion
0,ala,-0.138234,-0.118715,-0.100234,-0.138234,-0.118715,-0.100234
1,arg,-3.225336,-2.127782,-1.59904,4.481403,4.390957,4.303165
2,asn,-1.800469,-1.256246,-0.895895,-1.800469,-1.256246,-0.895895
3,asp,-1.3266,-0.831851,-0.510109,-8.705812,-8.138319,-7.615657
4,cys,-0.880283,-0.458234,-0.233128,-0.880283,-0.458234,-0.233128
5,gln,-2.36457,-1.721022,-1.321998,-2.36457,-1.721022,-1.321998
6,glu,-1.717493,-1.120843,-0.900507,-8.961262,-8.330724,-7.762967
7,gly,0.0,0.0,0.0,0.0,0.0,0.0
8,his,-2.353829,-1.601587,-1.211088,5.551417,5.389953,5.226539
9,ile,-0.63826,-0.347956,-0.270345,-0.63826,-0.347956,-0.270345


In [11]:
# one-letter AA names
AAs = {'ala': 'A', 'arg': 'R', 'asn': 'N', 'asp': 'D',
       'val': 'V', 'his': 'H', 'gly': 'G', 'gln': 'Q',
       'glu': 'E', 'ile': 'I', 'leu': 'L', 'lys': 'K',
       'met': 'M', 'pro': 'P', 'ser': 'S', 'tyr': 'Y',
       'thr': 'T', 'trp': 'W', 'phe': 'F', 'cys': 'C'}
edf['Aminoacid'] = edf['Aminoacid'].replace(AAs)
edf.columns = ['Aminoacid'] + [f'{M} interaction energy, eV' for M in ('Mg', 'Ca', 'Ba')] + \
              [f'{M} interaction energy, charged sidechains, eV' for M in ('Mg', 'Ca', 'Ba')]
edf

Unnamed: 0,Aminoacid,"Mg interaction energy, eV","Ca interaction energy, eV","Ba interaction energy, eV","Mg interaction energy, charged sidechains, eV","Ca interaction energy, charged sidechains, eV","Ba interaction energy, charged sidechains, eV"
0,A,-0.138234,-0.118715,-0.100234,-0.138234,-0.118715,-0.100234
1,R,-3.225336,-2.127782,-1.59904,4.481403,4.390957,4.303165
2,N,-1.800469,-1.256246,-0.895895,-1.800469,-1.256246,-0.895895
3,D,-1.3266,-0.831851,-0.510109,-8.705812,-8.138319,-7.615657
4,C,-0.880283,-0.458234,-0.233128,-0.880283,-0.458234,-0.233128
5,Q,-2.36457,-1.721022,-1.321998,-2.36457,-1.721022,-1.321998
6,E,-1.717493,-1.120843,-0.900507,-8.961262,-8.330724,-7.762967
7,G,0.0,0.0,0.0,0.0,0.0,0.0
8,H,-2.353829,-1.601587,-1.211088,5.551417,5.389953,5.226539
9,I,-0.63826,-0.347956,-0.270345,-0.63826,-0.347956,-0.270345


In [12]:
# save
edf.to_csv('data/energies.csv', index=False)