forked from DustinMorado/EPU_group_AFIS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_exp_id_from_star.py
executable file
·219 lines (181 loc) · 6.03 KB
/
get_exp_id_from_star.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#!/usr/bin/env python
"""Apply optics groups from Relion .star file to Cryosparc .cs file."""
# . -*- coding: utf-8 -*-
import numpy as np
try:
import pyem
except ImportError:
print(
"Couldn't import pyem, try activating pyem Conda environment:"
"\n\n$ conda activate pyem\n"
)
import sys
sys.exit()
import argparse
import re
import tqdm
def exposure_groups_from_star(
starfnam,
csfile,
outputfilename,
fnam_regex_template="FoilHole_[0-9]*_Data_[0-9]*_[0-9]*_[0-9]*",
backupcs=True,
verbose=True,
):
"""Apply groups in star file to those in a Cryosparc cs file.
This is accomplished by matching movie filenames between the star
and cs files
Parameters
----------
starfnam : string
Filename of the .star file from which micrograph optics groups
will be taken
csfile : string
Filename of the crysoparc .cs file to which optics groups will
be applied to matching filenames
outputfilename : string
Filename of the output .cs file
fnam_regex_template : string, optional
Regular expression template for matching of filenames, this
helps avoid errors in matching due to file extensions (eg.
doseweighted, eer, etc) that might be added in the processing
pipeline
backupcs : bool, optional
If True create a back up of csfile
verbose : bool, optional
Option for verbose command line output
"""
# Parse star file, getting group and micrograph information
dfs = pyem.star.parse_star(starfnam)
Micrographs = dfs
# Get total number of optics groups
maxgroup = max(dfs[pyem.star.Relion.OPTICSGROUP])
# Save a backup copy of the input cs file if it is to be overwritten
if backupcs:
import shutil
backup = csfile + ".bak"
prompt = ("\nCreating backup of {0}, {1}. Use --nobackup flag "
"to turn this off\n").format(
csfile, backup
)
if verbose:
print(prompt)
shutil.copyfile(csfile, backup)
# Load cryosparc cs_file
cs_items = np.load(csfile)
# Strip directory and extension from file names and convert to list
star_micrograph_names = dfs[pyem.star.Relion.MICROGRAPHMOVIE_NAME]
star_micrographs = [
re.search(fnam_regex_template, x).group()
for x in star_micrograph_names
]
# Cs parameters to change
targets = ["mscope_params/exp_group_id", "ctf/exp_group_id"]
# Find which targets exist inside cs file
presenttargets = list(set(cs_items.dtype.names) & set(targets))
if len(presenttargets) < 1:
raise Exception(
("Couldn't find any of the following parameters in {0}:\n"
"\n{1}\n\n try a different cs file that contains exposure"
" group ids?").format(
csfile, "\n".join(targets)
)
)
# Possible tags under which the micrograph filename is stored
tags = ["movie_blob/path", "blob/path", "location/micrograph_path"]
# Find which tag is present for these
for itag, tag in enumerate(tags):
if cs_items.dtype.names.count(tag) > 0:
break
errorstring = "Couldn't find {0} from {1} in {2}"
if verbose:
print(
"Applying optics groups from {0} to items in {1}\n".format(
starfnam, csfile
)
)
# Iterate over items in cs file
for micrograph in tqdm.tqdm(cs_items, desc="items"):
# Get Group ID
micrograph_path = micrograph[tag].decode(encoding="utf-8")
micrograph_fnam = re.search(
fnam_regex_template, micrograph_path
).group()
# Exception handling for micrograph from cs file not existing
# in star file - these are lumped into a final group
try:
# Iterate over star_micrographs and store the index if the
# t value matche she micrograph filename we're looking for
ii = [
idx
for idx, s in enumerate(star_micrographs)
if s in micrograph_fnam
][0]
group_id = Micrographs[pyem.star.Relion.OPTICSGROUP][ii]
except IndexError:
if verbose:
print(errorstring.format(micrograph_fnam, csfile, starfnam))
# Assign group id maxgroup +1
group_id = maxgroup + 1
# Apply group id to cryosparc params
for target in presenttargets:
micrograph[target] = group_id
# Save new cs file
# Default is to overwrite existing cs file, otherwise write to
# user inputted cs file.
if outputfilename is None:
out = open(csfile, "wb")
else:
out = open(outputfilename, "wb")
np.save(out, cs_items)
if __name__ == "__main__":
# Parse command line arguments
parser = argparse.ArgumentParser(
description=("Update Cryosparc exposure IDs in a .cs file from"
" Relion micrographs .star file"),
epilog="Writted by Hamish Brown 01.11.2022",
)
parser.add_argument(
"--csout",
"-o",
type=str,
required=False,
default=None,
help=("Output cs file, default is to overwrite input cs file"),
)
parser.add_argument(
"--nobackup",
"-n",
required=False,
action="store_true",
help=("Do not backup the .cs file"),
)
parser.add_argument(
"--verbose",
"-v",
required=False,
action="store_true",
help=("Verbose output"),
)
parser.add_argument(
"--star",
"-i",
type=str,
required=True,
help=("Path to micrographs .star file " "[REQUIRED]"),
)
parser.add_argument(
"--cs",
"-i2",
type=str,
required=True,
help=("Input cryosparc cs file [REQUIRED]"),
)
args = parser.parse_args()
exposure_groups_from_star(
args.star,
args.cs,
args.csout,
backupcs=not args.nobackup,
verbose=args.verbose,
)