-
Notifications
You must be signed in to change notification settings - Fork 0
/
mtx_to_star.py
executable file
·150 lines (135 loc) · 7.56 KB
/
mtx_to_star.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/nfs/home/bbasanta/anaconda2/envs/py37/bin/python
import numpy as np
import json
from scipy.spatial.transform import Rotation as R
import pandas as pd
import argparse
import sys
def interpret(s):
try: return json.loads(s)
except ValueError: return s
def read_star(fname):
version_n = 0
opticgr_cols = []
opticgr_entries = []
part_cols = []
part_entries = []
rln_31 = False
with open(fname, 'r') as file:
lines = [i[:-1] for i in file.readlines()]
loop_ = False
for line in lines:
split_line = line.split()
if version_n == 0 and line.startswith('data_'): print('RELION version < 3.1 star file detected.'); version_n = 2;
if line.startswith('# version'):
print('RELION version >= 3.1 star file detected.')
rln_31 = True
version_n+=1
loop_ = False
if line.startswith('loop_'): loop_ = True; continue
if version_n == 1 and len(split_line)>0:
if line.startswith('_rln'): opticgr_cols.append(split_line[0])
elif loop_: opticgr_entries.append(split_line)
if version_n == 2 and len(split_line)>0:
if line.startswith('_rln'): part_cols.append(split_line[0])
elif loop_: part_entries.append(split_line)
opt_dict = { i[0]:{ j:interpret( k ) for j,k in zip(opticgr_cols,i) } for i in opticgr_entries}
opt_gr_df = pd.DataFrame.from_dict(opt_dict,orient='index',columns=opticgr_cols)
part_dict = { n:{ j:interpret( k ) for j,k in zip(part_cols,i) } for n,i in enumerate(part_entries)}
part_df = pd.DataFrame.from_dict(part_dict,orient='index',columns=part_cols)
print('Read %d particle entries from RELION star file.'%(len(part_df.index)))
return opt_gr_df,part_df,rln_31
def write_star(fname,opt_df,par_df,is_rln_31):
fout = open(fname,'w')
if is_rln_31:
fout.write('\n# version 30001\n\ndata_optics\n\nloop_\n')
#fout.write('_rlnOpticsGroupName #1\n')
for n,col in enumerate(opt_df.columns):
#fout.write('_rlnOpticsGroupName #1\n')
fout.write('%s #%d\n'%(col,n+1))
fout.write(opt_df.to_string(header=False,index=False,index_names=False)+'\n')
fout.write('\n\n# version 30001\n\ndata_particles\n\nloop_\n')
else: fout.write('\ndata_\n\nloop_\n')
for n,col in enumerate(par_df.columns):
fout.write('%s #%d\n'%(col,n+1))
if not is_rln_31: fout.write('\n')
fout.write(par_df.to_string(header=False,index=False,index_names=False)+'\n')
fout.close()
def reconstitute_dict_from_file(infile):
f = open(infile,'r')
lines = []
for i in f.readlines():
i = i.replace("\r","")
i = i.replace("\n","")
lines.append(i)
f.close()
output_dict = {}
for line in lines:
output_dict.update( json.loads(line) )
print('Read %d particle entries from chimera session matrices.'%(len(output_dict)))
return output_dict
def get_name_dict(infile):
f = open(infile,'r')
lines = []
for i in f.readlines():
i = i.replace("\r","")
i = i.replace("\n","")
lines.append(i)
f.close()
output_dict = {}
for line in lines:
split_line = line.split(',')
output_dict[ split_line[0] ] = split_line[1]
print('Read %d particle entries from a provided *.csv file with particle name equivalences'%(len(output_dict)))
return output_dict
def get_eulers_and_translation(mtx,angpix):
rot, tilt, psi = R.from_matrix(mtx[:3,:3]).as_euler('ZYZ',degrees=True)
x, y, z = tuple(mtx[:3,-1]*angpix)
return [ rot, tilt, psi, x, y, z ]
if __name__ == "__main__":
argparser = argparse.ArgumentParser(description='Use transformation matrices generated by chim_session_to_mtx.py \
to populate a template relion star file Euler angles and rotation origins.')
argparser.add_argument('--template_star',required=True, type=str,help='RELION *.star file to use as template.')
argparser.add_argument('--chimera_mtx_output',required=True, type=str,help='Output file from chim_session_to_mtx.py')
argparser.add_argument('--rln_particle_particle_csv',required=False,type=str,help='A "coma separated values" file denoting equivalences between the naming of particles in the *.star file and chimera sessions. E.g., if a particle is named "Particles/Tomograms/tomogram101/tomogram101_subtomo000004.mrc" in the star file, and "tomo1_4.mrc" in the Chimersa session, the *.csv file should have a line with the following format: "tomogram101_subtomo000004.mrc,tomo1_4.mrc". There should be one pair per line.')
argparser.add_argument('--angpix',required=False, type=float ,help='Particle pixel size.')
argparser.add_argument('--output_name',type=str,help='Name for output *.star file')
args = argparser.parse_args()
opt_df, par_df, is_rln_31 = read_star(args.template_star)
recons_mtx_dict = reconstitute_dict_from_file(args.chimera_mtx_output)
particle_name_dict = { i:i for i in recons_mtx_dict.keys() }
if args.rln_particle_particle_csv: particle_name_dict = get_name_dict(args.rln_particle_particle_csv)
names = [ i for i,j in particle_name_dict.items() ]
new_part_df_only_entries = par_df[ [ p in names for p in [ n.split('/')[-1] for n in par_df["_rlnImageName"] ] ] ]
print( "%d particles successfully mapped to a star file entry."%( len(new_part_df_only_entries.index) ) )
in_angs = False
if ('_rlnOriginXAngst' in new_part_df_only_entries.columns) or is_rln_31:
in_angs = True
print('rlnOriginXAngst detected in star file particle records, translation will be provided in Angstroms.')
if not args.angpix:
sys.exit('Origin is required to be in Angstroms, but particle pixel size was not provided. Exiting without producing new star file.')
container = []
for particle in [ n.split('/')[-1] for n in new_part_df_only_entries["_rlnImageName"] ]:
angpix = 1.0
if in_angs: angpix = args.angpix
try: chim_ses_name = particle_name_dict[particle]
except KeyError: sys.exit('Particle %s is not in the recognized name in equivalence file or Chimera session.'%particle)
try: mtx = np.array(recons_mtx_dict[chim_ses_name])
except KeyError: sys.exit('Particle %s does not have a Chimera matrix! Exiting.'%chim_ses_name)
print('%s processed OK'%particle)
container.append(get_eulers_and_translation(mtx,angpix))
container = np.array(container)
new_part_df_only_entries = new_part_df_only_entries.assign(_rlnAngleRot=container[:,0])
new_part_df_only_entries = new_part_df_only_entries.assign(_rlnAngleTilt=container[:,1])
new_part_df_only_entries = new_part_df_only_entries.assign(_rlnAnglePsi=container[:,2])
if in_angs:
new_part_df_only_entries = new_part_df_only_entries.assign(_rlnOriginXAngst=container[:,3])
new_part_df_only_entries = new_part_df_only_entries.assign(_rlnOriginYAngst=container[:,4])
new_part_df_only_entries = new_part_df_only_entries.assign(_rlnOriginZAngst=container[:,5])
else:
new_part_df_only_entries = new_part_df_only_entries.assign(_rlnOriginX=container[:,3])
new_part_df_only_entries = new_part_df_only_entries.assign(_rlnOriginY=container[:,4])
new_part_df_only_entries = new_part_df_only_entries.assign(_rlnOriginZ=container[:,5])
outname = '.'.join(args.template_star.split('/')[-1].split('.')[:-1])+'.init_from_Chimera.star'
if args.output_name: outname = args.output_name
write_star(outname,opt_df,new_part_df_only_entries,is_rln_31)