# Coarse-graining at amino acid level of mAb all-atom structure

In [8]:
# Go in the working directory

try:
    workdir
except NameError:
    workdir=%pwd
else:
    %cd -q $workdir

%cd -q $workdir


In [3]:
#Import all packages needed
import mdtraj as md
import numpy as np
import os

# Steps:

1. Fix the original structure with PDB fixer
2. Create manually NTR and CTR terminals for each of chains.
3. Perform the CG at amino acid level and rename cysteine involved in disulfide bonds to avoid them to be regarded as titratable during simulations.

# 3. CG at amino acid level

In [None]:
### Load structure with terminals ###
traj = md.load_pdb('/Users/isabelvinterbladh/Downloads/1BAW.pdb')
sel = traj.topology.select('all')
top = traj.topology.subset(sel)
print('')
print('The pdb structure contains '+str(traj.n_atoms)+' atoms, '+str(traj.n_residues)+' residues, and '+str(traj.n_chains)+' chains!')
print('')
print("Searching cysteine involved in disulfide bonds ...")
print('')
def load_coarsegrain(file):
    traj = md.load_pdb(f'../../pdb/aa/pdb{file}.ent.gz')
    sel = traj.topology.select('all')
    top = traj.topology.subset(sel)

    fname = f'xyz-files/{file}.xyz'
    file = open(fname, 'w')
    file.write(str(traj.top.n_residues)+'\n')
    for res in traj.top.residues:    
        cm = [0,0,0] # residue mass center
        mw = 0       # residue weight
        for a in res.atoms:
            cm = cm + a.element.mass * traj.xyz[0][a.index]
            mw = mw + a.element.mass
        cm = cm/mw*10

        file.write('{0:4} {1:8.3f} {2:8.3f} {3:8.3f}\n'\
            .format(res.name, cm[0],cm[1],cm[2]))
    file.close()
    

NameError: name 'traj' is not defined

In [36]:
dir = '../../pdb/aa/'
files = os.listdir(dir)



In [7]:
dir = 'hamelryck_structures'
files = os.listdir(dir)

for file in files:
    if file.endswith('.pdb'):
        create_xyzfile(file, f"new_files/{file[3:7]}.xyz")

In [6]:
def CGfunc(traj, file_xyz, multi_model=False):
    amino_acids = ['ALA','ARG','ASN','ASP','CYS','GLN','GLU','GLY','HIS','ILE','LEU','LYS','MET','PHE','PRO','SER','THR','TRP','TYR','VAL']
    num_amino_acids = 0
    if multi_model:
        print(f'The structure contains {traj.n_frames} models.')
        for i in range(traj.n_frames):
            if traj.topology.n_chains > 1:
                print(f'Processing model {i+1} with {traj.topology.n_chains} chains.')
                for chain in traj.topology.chains:
                    num_amino_acids += sum(1 for res in chain.residues if res.name in amino_acids)
            else:
                num_amino_acids += sum(1 for res in traj.topology.residues if res.name in amino_acids)
    elif traj.topology.n_chains > 1:
        print(f'The structure contains {traj.topology.n_chains} chains.')
        for chain in traj.topology.chains:
            num = sum(1 for res in chain.residues if res.name in amino_acids)
            print(f'Chain {chain.index} contains {num} amino acids.')
            num_amino_acids += num
    else:
        num_amino_acids = sum(1 for res in traj.topology.residues if res.name in amino_acids)
    print(f'The structure contains {num_amino_acids} amino acids.')
    file = open(file_xyz, 'w')
    file.write(str(num_amino_acids)+'\n')
    file.write('\n')
    if multi_model:
        for i in range(traj.n_frames):
            print(f'Processing model {i+1} with {traj.topology.n_chains} chains.')
            if traj.topology.n_chains > 1:
                for chain in traj.topology.chains:
                    print(f'Processing chain {chain.index} with {chain.n_residues} residues.')
                    for res in chain.residues:    
                        cm = [0,0,0] # residue mass center
                        mw = 0       # residue weight
                        if res.name not in amino_acids:
                            continue
                        else:
                            for a in res.atoms:
                                cm = cm + a.element.mass * traj.xyz[i][a.index]
                                mw = mw + a.element.mass
                            cm = cm/mw*10
                            file.write('{0:4} {1:8.3f} {2:8.3f} {3:8.3f}\n'.format(res.name, cm[0],cm[1],cm[2]))
            else:
                print(f'Processing single chain with {len(traj.topology.residues)} residues.')
                for res in traj.topology.residues:    
                    cm = [0,0,0] # residue mass center
                    mw = 0       # residue weight
                    if res.name not in amino_acids:
                        continue
                    else:
                        for a in res.atoms:
                            cm = cm + a.element.mass * traj.xyz[i][a.index]
                            mw = mw + a.element.mass
                        cm = cm/mw*10
                        file.write('{0:4} {1:8.3f} {2:8.3f} {3:8.3f}\n'.format(res.name, cm[0],cm[1],cm[2]))
    elif traj.topology.n_chains > 1:
        for chain in traj.topology.chains:
            print(f'Processing chain {chain.index} with {chain.n_residues} residues.')
            for res in chain.residues:    
                cm = [0,0,0] # residue mass center
                mw = 0       # residue weight
                if res.name not in amino_acids:
                    continue
                else:
                    for a in res.atoms:
                        cm = cm + a.element.mass * traj.xyz[0][a.index]
                        mw = mw + a.element.mass
                    cm = cm/mw*10
                    print(f'Wrote coordinates for residue {res.name}')
                    file.write('{0:4} {1:8.3f} {2:8.3f} {3:8.3f}\n'.format(res.name, cm[0],cm[1],cm[2]))
    else:
        print(f'Processing single chain with {len(traj.topology.residues)} residues.')
        for res in traj.topology.residues:    
            cm = [0,0,0] # residue mass center
            mw = 0       # residue weight
            if res.name not in amino_acids:
                continue
            else:
                for a in res.atoms:
                    cm = cm + a.element.mass * traj.xyz[0][a.index]
                    mw = mw + a.element.mass
                cm = cm/mw*10
                file.write('{0:4} {1:8.3f} {2:8.3f} {3:8.3f}\n'.format(res.name, cm[0],cm[1],cm[2]))
    file.close()
    
def create_xyzfile(pdb, file_xyz, multi_model=False):
    traj = md.load_pdb(f'{pdb}')
    CGfunc(traj, file_xyz, multi_model=multi_model)   
    

In [12]:
%cd $workdir
files = os.listdir('../../experimental_data/')
for file in files:
    if file.endswith('_fit1_model1.pdb'):
        data = file.split('_fit1_model1.pdb')[0]
        create_xyzfile(f'../../experimental_data/{data}_fit1_model1.pdb', f'{data}.xyz')
        

/Users/isabelvinterbladh/Documents/HALRIC/SAXSpy/SAXSpy/cg_structures
The structure contains 2 chains.
Chain 0 contains 76 amino acids.
Chain 1 contains 0 amino acids.
The structure contains 76 amino acids.
Processing chain 0 with 76 residues.
Wrote coordinates for residue MET
Wrote coordinates for residue GLN
Wrote coordinates for residue ILE
Wrote coordinates for residue PHE
Wrote coordinates for residue VAL
Wrote coordinates for residue LYS
Wrote coordinates for residue THR
Wrote coordinates for residue LEU
Wrote coordinates for residue THR
Wrote coordinates for residue GLY
Wrote coordinates for residue LYS
Wrote coordinates for residue THR
Wrote coordinates for residue ILE
Wrote coordinates for residue THR
Wrote coordinates for residue LEU
Wrote coordinates for residue GLU
Wrote coordinates for residue VAL
Wrote coordinates for residue GLU
Wrote coordinates for residue PRO
Wrote coordinates for residue SER
Wrote coordinates for residue ASP
Wrote coordinates for residue THR
Wrote co

In [60]:
traj = md.load_pdb('/Users/isabelvinterbladh/Downloads/1_7ktz_A_1-7ktz_A_2.pdb')

In [63]:
CGfunc(traj, 'exp1_cgn.xyz')

The structure contains 4 chains.
Chain 0 contains 241 amino acids.
Chain 1 contains 0 amino acids.
Chain 2 contains 241 amino acids.
Chain 3 contains 0 amino acids.
The structure contains 482 amino acids.
Processing chain 0 with 241 residues.
Wrote coordinates for residue CYS
Wrote coordinates for residue GLY
Wrote coordinates for residue VAL
Wrote coordinates for residue PRO
Wrote coordinates for residue ALA
Wrote coordinates for residue ILE
Wrote coordinates for residue GLN
Wrote coordinates for residue PRO
Wrote coordinates for residue VAL
Wrote coordinates for residue LEU
Wrote coordinates for residue SER
Wrote coordinates for residue GLY
Wrote coordinates for residue LEU
Wrote coordinates for residue SER
Wrote coordinates for residue ARG
Wrote coordinates for residue ILE
Wrote coordinates for residue VAL
Wrote coordinates for residue ASN
Wrote coordinates for residue GLY
Wrote coordinates for residue GLU
Wrote coordinates for residue GLU
Wrote coordinates for residue ALA
Wrote coo