In [3]:
import pandas

In [4]:
def parsePDB(pdb_file):
    """Transforms the pdb file into a pandas table for easy access and data editing."""

    def pdb_line(line):
        return dict(recname=str(line[0:6]).strip(),  # record name
                    serial=int(line[6:11]),          # atom serial number
                    name=str(line[12:16]).strip(),   # atom name
                    altLoc=str(line[16:17]),         # alternate location indicator
                    resname=str(line[17:20]).strip(),
                    chainID=str(line[21:22]),
                    resSeq=int(line[22:26]),         # residue sequence number
                    iCode=str(line[26:27]),          # code for insertion of residues
                    x=float(line[30:38]),
                    y=float(line[38:46]),
                    z=float(line[46:54]),
                    occupancy=1.0 if line[54:60].strip() == '' else float(line[54:60]), # set to 1.0 because Plumed RMSD need 1.0
                    tempFactor=1.0 if line[60:66].strip() == '' else float(line[60:66]),
                    element=str(line[76:78]),        # element symbol, right-justified
                    charge=str(line[78:80]))         # charge on the atom, right-justified

    with open(pdb_file, 'r') as pdb:
        lines = []
        for line in pdb:
            if len(line) > 6 and line[:6] in ['ATOM  ', 'HETATM']:
                lines += [pdb_line(line)]
    pdb_atoms = pandas.DataFrame(lines)
    pdb_atoms = pdb_atoms[['recname', 'serial', 'name', 'altLoc',
                           'resname', 'chainID', 'resSeq', 'iCode',
                           'x', 'y', 'z', 'occupancy', 'tempFactor',
                           'element', 'charge']]
    return pdb_atoms

In [5]:
test1 = parsePDB('/mnt/f/trash/20201014_pdb_rna/all_rna/1A34.pdb')

In [13]:
test2 = test1[test1['resname'].isin(['A', 'U', 'C', 'G'])]

In [15]:
test3 = test2.replace({'recname': {'HETATM': 'ATOM'}})
print(test3)

recname  serial  name altLoc resname chainID  resSeq iCode       x  \
2309    ATOM    2311   OP3              A       B       1        12.339   
2310    ATOM    2312     P              A       B       1        13.774   
2311    ATOM    2313   OP1              A       B       1        13.967   
2312    ATOM    2314   OP2              A       B       1        13.788   
2313    ATOM    2315   O5'              A       B       1        14.872   
...      ...     ...   ...    ...     ...     ...     ...   ...     ...   
2965    ATOM    2969   H4'              U       B      11         3.311   
2966    ATOM    2970   H3'              U       B      11         4.823   
2967    ATOM    2971  HO3'              U       B      11         4.375   
2968    ATOM    2972  HO2'              U       B      11         3.546   
2969    ATOM    2973   H1'              U       B      11         1.650   

           y       z  occupancy  tempFactor element charge  
2309  38.322  15.489       0.50      220.88

In [19]:
test3.reset_index()

Unnamed: 0,index,recname,serial,name,altLoc,resname,chainID,resSeq,iCode,x,y,z,occupancy,tempFactor,element,charge
0,2309,ATOM,2311,OP3,,A,B,1,,12.339,38.322,15.489,0.50,220.88,O,
1,2310,ATOM,2312,P,,A,B,1,,13.774,39.026,15.251,0.50,220.71,P,
2,2311,ATOM,2313,OP1,,A,B,1,,13.967,39.419,13.808,0.50,220.58,O,
3,2312,ATOM,2314,OP2,,A,B,1,,13.788,40.109,16.300,0.50,220.88,O,
4,2313,ATOM,2315,O5',,A,B,1,,14.872,37.934,15.708,0.50,218.41,O,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
651,2965,ATOM,2969,H4',,U,B,11,,3.311,50.716,34.658,0.42,50.36,H,
652,2966,ATOM,2970,H3',,U,B,11,,4.823,48.648,33.622,0.42,50.36,H,
653,2967,ATOM,2971,HO3',,U,B,11,,4.375,49.717,35.886,0.42,50.36,H,
654,2968,ATOM,2972,HO2',,U,B,11,,3.546,46.686,35.695,0.42,50.36,H,


In [31]:
test4 = test2.reset_index()
test4['serial'] = test4.index+1

In [32]:
test4

Unnamed: 0,index,recname,serial,name,altLoc,resname,chainID,resSeq,iCode,x,y,z,occupancy,tempFactor,element,charge
0,2309,ATOM,1,OP3,,A,B,1,,12.339,38.322,15.489,0.50,220.88,O,
1,2310,ATOM,2,P,,A,B,1,,13.774,39.026,15.251,0.50,220.71,P,
2,2311,ATOM,3,OP1,,A,B,1,,13.967,39.419,13.808,0.50,220.58,O,
3,2312,ATOM,4,OP2,,A,B,1,,13.788,40.109,16.300,0.50,220.88,O,
4,2313,ATOM,5,O5',,A,B,1,,14.872,37.934,15.708,0.50,218.41,O,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
651,2965,HETATM,652,H4',,U,B,11,,3.311,50.716,34.658,0.42,50.36,H,
652,2966,HETATM,653,H3',,U,B,11,,4.823,48.648,33.622,0.42,50.36,H,
653,2967,HETATM,654,HO3',,U,B,11,,4.375,49.717,35.886,0.42,50.36,H,
654,2968,HETATM,655,HO2',,U,B,11,,3.546,46.686,35.695,0.42,50.36,H,


In [33]:
test5 = test4.drop(columns=['index'])

In [34]:
test5

Unnamed: 0,recname,serial,name,altLoc,resname,chainID,resSeq,iCode,x,y,z,occupancy,tempFactor,element,charge
0,ATOM,1,OP3,,A,B,1,,12.339,38.322,15.489,0.50,220.88,O,
1,ATOM,2,P,,A,B,1,,13.774,39.026,15.251,0.50,220.71,P,
2,ATOM,3,OP1,,A,B,1,,13.967,39.419,13.808,0.50,220.58,O,
3,ATOM,4,OP2,,A,B,1,,13.788,40.109,16.300,0.50,220.88,O,
4,ATOM,5,O5',,A,B,1,,14.872,37.934,15.708,0.50,218.41,O,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
651,HETATM,652,H4',,U,B,11,,3.311,50.716,34.658,0.42,50.36,H,
652,HETATM,653,H3',,U,B,11,,4.823,48.648,33.622,0.42,50.36,H,
653,HETATM,654,HO3',,U,B,11,,4.375,49.717,35.886,0.42,50.36,H,
654,HETATM,655,HO2',,U,B,11,,3.546,46.686,35.695,0.42,50.36,H,


In [44]:
test6 = test5[test5['recname'] != 'HETATM']

In [46]:
test6.to_csv(path_or_buf='/mnt/e/test.csv', index=False)