In [28]:
import os
import MDAnalysis as mda
from ase.io import read
import numpy as np

In [29]:
blueprint_meta_data = f"""data_peptide_project

  save_nef_nmr_meta_data

      _nef_nmr_meta_data.sf_category      nef_nmr_meta_data
      _nef_nmr_meta_data.sf_framecode     nef_nmr_meta_data
      _nef_nmr_meta_data.format_name      nmr_exchange_format
      _nef_nmr_meta_data.format_version   1.1
      _nef_nmr_meta_data.program_name     AnalysisAssign
      _nef_nmr_meta_data.program_version  3.0.1.1
      _nef_nmr_meta_data.creation_date    2023-06-09T20:53:22.311917
      _nef_nmr_meta_data.uuid             AnalysisAssign-2020-06-14T20:53:22.311917-1956756036
  save_


"""

blueprint_molecular_system_start = """save_nef_molecular_system

   _nef_molecular_system.sf_category   nef_molecular_system
   _nef_molecular_system.sf_framecode  nef_molecular_system

   loop_
      _nef_sequence.index
      _nef_sequence.chain_code
      _nef_sequence.sequence_code
      _nef_sequence.residue_name
      _nef_sequence.linking
      _nef_sequence.residue_variant
      _nef_sequence.ccpn_comment

"""

blueprint_molecular_system_end = """   stop_
save_


"""

blueprint_cs_end = """   stop_
save_
"""

In [30]:
# pdb_filename = "/storage_common/angiod/NMR/Federica/pdb/2i46.pdb"
pdb_filename = "/storage_common/cardev/2023/10-SP1-TPP/MD-LONE-WILD/Danie/clusters.pdb"
filename = "results/nmr_rmsf_shiftx2_bead_types/mace-v2-production/ds_0__clusters__evaluation.xyz"
out_folder = "peaks/all" # "nmr_peaks"
id_offset = 0

all_ppm = []
for index in range(153):
    # Read predicted CS
    atoms = read(filename, index=index)
    ppm = atoms.arrays["energies"]
    all_ppm.append(ppm)

all_ppm = np.stack(all_ppm, axis=0)

In [31]:
weights = []
with open('clusters.xvg', 'r') as f:
    for line in f.readlines():
        _, w = line.split()
        weights.append(float(w))
weights = np.array(weights)

In [27]:
### TAKE MEAN ###
# ppm = all_ppm.mean(axis=0)
ppm = np.sum(all_ppm * weights[:, None], axis=0) / np.sum(weights)
#################

### TAKE Ith ###
# index = 4
# ppm = all_ppm[index]
#################

ppm_error = all_ppm.std(axis=0)

# Read PDB
u = mda.Universe(pdb_filename)
sel = u.select_atoms("protein") # and chainid A

ID = index + id_offset

blueprint_cs_start = f"""data_{ID}

save_assigned_chemical_shifts_1
   _Assigned_chem_shift_list.Sf_category                   assigned_chemical_shifts
   _Assigned_chem_shift_list.Sf_framecode                  assigned_chemical_shifts_1
   _Assigned_chem_shift_list.Entry_ID                      {ID}
   _Assigned_chem_shift_list.ID                            1
   _Assigned_chem_shift_list.Name                          BIR1-cIAP1
   _Assigned_chem_shift_list.Sample_condition_list_ID      1
   _Assigned_chem_shift_list.Sample_condition_list_label   $sample_conditions_1
   _Assigned_chem_shift_list.Chem_shift_reference_ID       1
   _Assigned_chem_shift_list.Chem_shift_reference_label    $chem_shift_reference_1
   _Assigned_chem_shift_list.Chem_shift_1H_err             0.02
   _Assigned_chem_shift_list.Chem_shift_13C_err            0.2
   _Assigned_chem_shift_list.Chem_shift_15N_err            0.2
   _Assigned_chem_shift_list.Chem_shift_31P_err            .
   _Assigned_chem_shift_list.Chem_shift_2H_err             .
   _Assigned_chem_shift_list.Chem_shift_19F_err            .
   _Assigned_chem_shift_list.Error_derivation_method       .
   _Assigned_chem_shift_list.Details                       .
   _Assigned_chem_shift_list.Text_data_format              .
   _Assigned_chem_shift_list.Text_data                     .
   
   
   loop_
      _Atom_chem_shift.ID
      _Atom_chem_shift.Assembly_atom_ID
      _Atom_chem_shift.Entity_assembly_ID
      _Atom_chem_shift.Entity_assembly_asym_ID
      _Atom_chem_shift.Entity_ID
      _Atom_chem_shift.Comp_index_ID
      _Atom_chem_shift.Seq_ID
      _Atom_chem_shift.Comp_ID
      _Atom_chem_shift.Atom_ID
      _Atom_chem_shift.Atom_type
      _Atom_chem_shift.Atom_isotope_number
      _Atom_chem_shift.Val
      _Atom_chem_shift.Val_err
      _Atom_chem_shift.Assign_fig_of_merit
      _Atom_chem_shift.Ambiguity_code
      _Atom_chem_shift.Ambiguity_set_ID
      _Atom_chem_shift.Occupancy
      _Atom_chem_shift.Resonance_ID
      _Atom_chem_shift.Auth_entity_assembly_ID
      _Atom_chem_shift.Auth_asym_ID
      _Atom_chem_shift.Auth_seq_ID
      _Atom_chem_shift.Auth_comp_ID
      _Atom_chem_shift.Auth_atom_ID
      _Atom_chem_shift.Details
      _Atom_chem_shift.Entry_ID
      _Atom_chem_shift.Assigned_chem_shift_list_ID

"""
text = blueprint_cs_start

element_to_isotope = {
    'H': 1,
    'N': 15,
    'C': 13,
    'O': 8,
    'S': 16,
}
cc = 0

for atom, val, val_err in zip(sel.atoms, ppm, ppm_error):
    if atom.name not in ['N', 'H']:
        continue
    # if atom.resname != "LEU" or atom.name not in ['N', 'H']:
    #     continue
    # if atom.resnum != 183:
    #     continue
    # if val_err > 3.:
    #     continue
    cc += 1
    row = f"      {cc:<6}"
    row += ".   "
    row += "1   "
    row += ".   "
    row += "1   "
    row += f"{atom.resid:<6}"
    row += f"{atom.resid:<6}"
    row += f"{atom.resname:<6}"
    row += f"{atom.name:<5}"
    row += f"{atom.element:<4}"
    row += f"{element_to_isotope[atom.element]:<5}"
    row += f"{val:<10.3f}"
    row += f"{val_err:<8.3f}"
    row += ".   "
    row += "1   "
    row += ".   "
    row += ".   "
    row += ".   "
    row += ".   "
    row += ".   "
    row += f"{atom.resindex:<6}"
    row += f"{atom.resname:<6}"
    row += f"{atom.name:<5}"
    row += ".   "
    row += f"{ID:<8}"
    row += "1"
    row += "\n"
    text += row
text += blueprint_cs_end

with open(os.path.join(out_folder, f"2i46_vince_production_{index}.str"), "w") as f:
    f.write(text)

In [None]:
"""save_nef_chemical_shift_list_bmr1234

   _nef_chemical_shift_list.sf_category   nef_chemical_shift_list
   _nef_chemical_shift_list.sf_framecode  nef_chemical_shift_list_bmr1234
   _nef_chemical_shift_list.ccpn_serial   1
   _nef_chemical_shift_list.ccpn_comment  ''
   
   
   loop_
      _nef_chemical_shift.chain_code
      _nef_chemical_shift.sequence_code
      _nef_chemical_shift.residue_name
      _nef_chemical_shsift.atom_name
      _nef_chemical_shift.value
      _nef_chemical_shift.value_uncertainty
      _nef_chemical_shift.element
      _nef_chemical_shift.isotope_number
      _nef_chemical_shift.ccpn_figure_of_merit
      _nef_chemical_shift.ccpn_comment

"""

In [None]:
# text = blueprint_meta_data
# text += blueprint_molecular_system_start

# cc = 0
# for res in sel.residues:
#     cc += 1
#     position = 'start' if cc == 1 else 'end' if cc == sel.n_residues else 'middle'
#     text += f"      {cc:<4}{res.atoms[0].chainID:<3}{cc:<4}{res.resname:<5}{position:<8}{'.':<3}.\n"

# text += blueprint_molecular_system_end
# text += blueprint_cs_start

# for atom, val in zip(sel.atoms, ppm):
#     cc += 1
#     row = f"      {atom.chainID:<4}"
#     row += f"{atom.resid:<5}"
#     row += f"{atom.resname:<5}"
#     row += f"{atom.name:<7}"
#     row += f"{val:<13.3f}"
#     row += "0   "
#     row += f"{atom.element:<3}"
#     row += f"{element_to_isotope[atom.element]:<4}"
#     row += "1  "
#     row += "."
#     row += "\n"
#     text += row

Read ShiftX2 predictions

In [None]:
import pandas as pd
import numpy as np

blueprint_cs_start_shiftx2 = """data_9990

save_assigned_chemical_shifts_1
   _Assigned_chem_shift_list.Sf_category                   assigned_chemical_shifts
   _Assigned_chem_shift_list.Sf_framecode                  assigned_chemical_shifts_1
   _Assigned_chem_shift_list.Entry_ID                      9990
   _Assigned_chem_shift_list.ID                            1
   _Assigned_chem_shift_list.Name                          BIR1-cIAP1
   _Assigned_chem_shift_list.Sample_condition_list_ID      1
   _Assigned_chem_shift_list.Sample_condition_list_label   $sample_conditions_1
   _Assigned_chem_shift_list.Chem_shift_reference_ID       1
   _Assigned_chem_shift_list.Chem_shift_reference_label    $chem_shift_reference_1
   _Assigned_chem_shift_list.Chem_shift_1H_err             0.02
   _Assigned_chem_shift_list.Chem_shift_13C_err            0.2
   _Assigned_chem_shift_list.Chem_shift_15N_err            0.2
   _Assigned_chem_shift_list.Chem_shift_31P_err            .
   _Assigned_chem_shift_list.Chem_shift_2H_err             .
   _Assigned_chem_shift_list.Chem_shift_19F_err            .
   _Assigned_chem_shift_list.Error_derivation_method       .
   _Assigned_chem_shift_list.Details                       .
   _Assigned_chem_shift_list.Text_data_format              .
   _Assigned_chem_shift_list.Text_data                     .
   
   
   loop_
      _Atom_chem_shift.ID
      _Atom_chem_shift.Assembly_atom_ID
      _Atom_chem_shift.Entity_assembly_ID
      _Atom_chem_shift.Entity_assembly_asym_ID
      _Atom_chem_shift.Entity_ID
      _Atom_chem_shift.Comp_index_ID
      _Atom_chem_shift.Seq_ID
      _Atom_chem_shift.Comp_ID
      _Atom_chem_shift.Atom_ID
      _Atom_chem_shift.Atom_type
      _Atom_chem_shift.Atom_isotope_number
      _Atom_chem_shift.Val
      _Atom_chem_shift.Val_err
      _Atom_chem_shift.Assign_fig_of_merit
      _Atom_chem_shift.Ambiguity_code
      _Atom_chem_shift.Ambiguity_set_ID
      _Atom_chem_shift.Occupancy
      _Atom_chem_shift.Resonance_ID
      _Atom_chem_shift.Auth_entity_assembly_ID
      _Atom_chem_shift.Auth_asym_ID
      _Atom_chem_shift.Auth_seq_ID
      _Atom_chem_shift.Auth_comp_ID
      _Atom_chem_shift.Auth_atom_ID
      _Atom_chem_shift.Details
      _Atom_chem_shift.Entry_ID
      _Atom_chem_shift.Assigned_chem_shift_list_ID

"""

df = pd.read_csv("s2x_0.csv", delim_whitespace=True, names=["ChainID", "resnum", "resname", "atomname", "val", 'uncertainty'])
ppm_shiftx2 = df["val"].values
shiftx2_names = (df["resnum"].astype(str) + '_' + df["resname"] + '_' + df["atomname"]).values

text = blueprint_cs_start_shiftx2

element_to_isotope = {
    'H': 1,
    'N': 15,
    'C': 13,
    'O': 8,
    'S': 16,
}
cc = 0

for atom in sel.atoms:
    name = f"{atom.resnum}_{atom.resname}_{atom.name}"
    if name not in shiftx2_names:
        continue
    cc += 1
    row = f"      {cc:<6}"
    row += ".   "
    row += "1   "
    row += ".   "
    row += "1   "
    row += f"{atom.resid:<6}"
    row += f"{atom.resid:<6}"
    row += f"{atom.resname:<6}"
    row += f"{atom.name:<5}"
    row += f"{atom.element:<4}"
    row += f"{element_to_isotope[atom.element]:<5}"
    row += f"{ppm_shiftx2[np.argwhere(shiftx2_names == name).item()]:<10.3f}"
    row += f"{0.300:<8.3f}"
    row += ".   "
    row += "1   "
    row += ".   "
    row += ".   "
    row += ".   "
    row += ".   "
    row += ".   "
    row += f"{atom.resindex:<6}"
    row += f"{atom.resname:<6}"
    row += f"{atom.name:<5}"
    row += ".   "
    row += f"{9990:<8}"
    row += "1"
    row += "\n"
    text += row
text += blueprint_cs_end

In [None]:
with open("s2x_0.str", "w") as f:
    f.write(text)

In [None]:
u = mda.Universe(pdb_filename)
sel = u.select_atoms("protein")

In [15]:
import pandas as pd
import numpy as np
offset = 10000
id_ = 4
i = offset + id_
blueprint_cs_start_shiftx2 = f"""data_{i}

save_assigned_chemical_shifts_1
   _Assigned_chem_shift_list.Sf_category                   assigned_chemical_shifts
   _Assigned_chem_shift_list.Sf_framecode                  assigned_chemical_shifts_1
   _Assigned_chem_shift_list.Entry_ID                      {i}
   _Assigned_chem_shift_list.ID                            1
   _Assigned_chem_shift_list.Name                          BIR1-cIAP1
   _Assigned_chem_shift_list.Sample_condition_list_ID      1
   _Assigned_chem_shift_list.Sample_condition_list_label   $sample_conditions_1
   _Assigned_chem_shift_list.Chem_shift_reference_ID       1
   _Assigned_chem_shift_list.Chem_shift_reference_label    $chem_shift_reference_1
   _Assigned_chem_shift_list.Chem_shift_1H_err             0.02
   _Assigned_chem_shift_list.Chem_shift_13C_err            0.2
   _Assigned_chem_shift_list.Chem_shift_15N_err            0.2
   _Assigned_chem_shift_list.Chem_shift_31P_err            .
   _Assigned_chem_shift_list.Chem_shift_2H_err             .
   _Assigned_chem_shift_list.Chem_shift_19F_err            .
   _Assigned_chem_shift_list.Error_derivation_method       .
   _Assigned_chem_shift_list.Details                       .
   _Assigned_chem_shift_list.Text_data_format              .
   _Assigned_chem_shift_list.Text_data                     .
   
   
   loop_
      _Atom_chem_shift.ID
      _Atom_chem_shift.Assembly_atom_ID
      _Atom_chem_shift.Entity_assembly_ID
      _Atom_chem_shift.Entity_assembly_asym_ID
      _Atom_chem_shift.Entity_ID
      _Atom_chem_shift.Comp_index_ID
      _Atom_chem_shift.Seq_ID
      _Atom_chem_shift.Comp_ID
      _Atom_chem_shift.Atom_ID
      _Atom_chem_shift.Atom_type
      _Atom_chem_shift.Atom_isotope_number
      _Atom_chem_shift.Val
      _Atom_chem_shift.Val_err
      _Atom_chem_shift.Assign_fig_of_merit
      _Atom_chem_shift.Ambiguity_code
      _Atom_chem_shift.Ambiguity_set_ID
      _Atom_chem_shift.Occupancy
      _Atom_chem_shift.Resonance_ID
      _Atom_chem_shift.Auth_entity_assembly_ID
      _Atom_chem_shift.Auth_asym_ID
      _Atom_chem_shift.Auth_seq_ID
      _Atom_chem_shift.Auth_comp_ID
      _Atom_chem_shift.Auth_atom_ID
      _Atom_chem_shift.Details
      _Atom_chem_shift.Entry_ID
      _Atom_chem_shift.Assigned_chem_shift_list_ID

"""

df = pd.read_csv(f"s2x_{id_}.csv", sep=',', names=["resnum", "resname", "atomname", "val"], skiprows=1, header=None)
ppm_shiftx2 = df["val"].values
# shiftx2_names = (df["resnum"].astype(str) + '_' + df["resname"] + '_' + df["atomname"]).values
shiftx2_names = (df["resnum"].astype(str) + '_' + df["atomname"]).values

text = blueprint_cs_start_shiftx2

element_to_isotope = {
    'H': 1,
    'N': 15,
    'C': 13,
    'O': 8,
    'S': 16,
}
cc = 0

for atom in sel.atoms:
    # name = f"{atom.resnum}_{atom.resname}_{atom.name}"
    name = f"{atom.resnum}_{atom.name}"
    if name not in shiftx2_names:
        continue
    if atom.resname != "LEU" or atom.name not in ['N', 'H']:
            continue
    if atom.resnum != 183:
         continue
    cc += 1
    row = f"      {cc:<6}"
    row += ".   "
    row += "1   "
    row += ".   "
    row += "1   "
    row += f"{atom.resid:<6}"
    row += f"{atom.resid:<6}"
    row += f"{atom.resname:<6}"
    row += f"{atom.name:<5}"
    row += f"{atom.element:<4}"
    row += f"{element_to_isotope[atom.element]:<5}"
    row += f"{ppm_shiftx2[np.argwhere(shiftx2_names == name).item()]:<10.3f}"
    row += f"{0.300:<8.3f}"
    row += ".   "
    row += "1   "
    row += ".   "
    row += ".   "
    row += ".   "
    row += ".   "
    row += ".   "
    row += f"{atom.resindex:<6}"
    row += f"{atom.resname:<6}"
    row += f"{atom.name:<5}"
    row += ".   "
    row += f"{i:<8}"
    row += "1"
    row += "\n"
    text += row
text += blueprint_cs_end

In [16]:
with open(os.path.join(out_folder, f"s2x_{id_}.str"), "w") as f:
    f.write(text)