Skip to content

Commit

Permalink
rewrite InstCatTrimmer class to save memory ingesting Run2.0i instanc…
Browse files Browse the repository at this point in the history
…e catalogs
  • Loading branch information
jchiang87 committed Sep 11, 2018
1 parent d9018ba commit 2bf5c5b
Show file tree
Hide file tree
Showing 7 changed files with 158 additions and 102 deletions.
27 changes: 19 additions & 8 deletions python/desc/imsim/ImageSimulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,25 +72,31 @@ def __init__(self, instcat, psf, numRows=None, config=None, seed=267,
Logging level ('DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL').
"""
self.config = read_config(config)
self.log_level = log_level
self.logger = get_logger(self.log_level, name='ImageSimulator')
self.create_centroid_file = create_centroid_file
self.psf = psf
self.outdir = outdir
if sensor_list is None:
sensor_list = self._get_all_sensors()
self.logger.debug("parsing instance catalog for %d sensors",
len(sensor_list))
self.obs_md, self.phot_params, sources \
= parsePhoSimInstanceFile(instcat, numRows=numRows)
self.gs_obj_arr = sources[0]
= parsePhoSimInstanceFile(instcat, sensor_list, numRows=numRows)
self.gs_obj_dict = sources[1]
self.camera_wrapper = LSSTCameraWrapper()
self.apply_sensor_model = apply_sensor_model
self._make_gs_interpreters(seed, sensor_list, file_id)
self.log_level = log_level
self.logger = get_logger(self.log_level, name='ImageSimulator')
if not self.gs_obj_arr:
self.logger.warn("No object entries in %s", instcat)

def _make_gs_interpreters(self, seed, sensor_list, file_id):
"""
Create a separate GalSimInterpreter for each sensor so that they
can be run in parallel and maintain separate checkpoint files.
Create a separate GalSimInterpreter for each sensor so that
they can be run in parallel and maintain separate checkpoint
files.
Also extract GsObjectLists from gs_obj_dict for only the
sensors in sensor_list so that the memory in the underlying
InstCatTrimmer object in gs_obj_dict can be recovered.
TODO: Find a good way to pass a different seed to each
gs_interpreter or have them share the random number generator.
Expand Down Expand Up @@ -139,6 +145,11 @@ def _make_gs_interpreters(self, seed, sensor_list, file_id):
os.path.join(self.outdir,
self.config['persistence']['centroid_prefix'])

def _get_all_sensors(self):
"""Get a list of all of the science sensors."""
return [det.getName() for det in self.camera_wrapper.camera
if det.getType() not in (WAVEFRONT, GUIDER)]

@staticmethod
def checkpoint_file(file_id, det_name):
"""
Expand Down
14 changes: 8 additions & 6 deletions python/desc/imsim/imSim.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def phosim_obs_metadata(phosim_commands):
return obs_md


def parsePhoSimInstanceFile(fileName, numRows=None):
def parsePhoSimInstanceFile(fileName, sensor_list, numRows=None):
"""
Read a PhoSim instance catalog into a Pandas dataFrame. Then use
the information that was read-in to build and return a command
Expand All @@ -509,6 +509,8 @@ def parsePhoSimInstanceFile(fileName, numRows=None):
----------
fileName : str
The instance catalog filename.
sensor_list: list
List of sensors for which to extract object lists.
numRows : int, optional
The number of rows to read from the instance catalog.
If None (the default), then all of the rows will be read in.
Expand All @@ -524,14 +526,14 @@ def parsePhoSimInstanceFile(fileName, numRows=None):
commands = metadata_from_file(fileName)
obs_metadata = phosim_obs_metadata(commands)
phot_params = photometricParameters(commands)
instcats = InstCatTrimmer(fileName, numRows=numRows)
gs_object_arr = GsObjectList(instcats.object_lines, instcats.obs_md,
phot_params, instcats.instcat_file)
gs_object_dict = GsObjectDict(instcats, phot_params)
instcats = InstCatTrimmer(fileName, sensor_list, numRows=numRows)
gs_object_dict = {detname: GsObjectList(instcats[detname], instcats.obs_md,
phot_params, instcats.instcat_file)
for detname in sensor_list}

return PhoSimInstanceCatalogContents(obs_metadata,
phot_params,
(gs_object_arr, gs_object_dict))
([], gs_object_dict))


class GsObjectDict:
Expand Down
195 changes: 115 additions & 80 deletions python/desc/imsim/trim.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""
Function to apply chip-centered acceptance cones on instance catalogs.
"""
import os
import numpy as np
import lsst.sims.coordUtils
from lsst.sims.utils import _angularSeparation
Expand All @@ -27,81 +26,36 @@ def degrees_separation(ra0, dec0, ra, dec):
return np.degrees(_angularSeparation(np.radians(ra0), np.radians(dec0),
np.radians(ra), np.radians(dec)))

class InstCatTrimmer:
class Disaggregator:
""".
Class to disaggregate instance catalog object lines into per chip
acceptance cones.
"""
Class to trim instance catalogs for acceptance cones centered
on CCDs in the LSST focalplane.
Attributes
----------
instcat_file: str
Instance catalog filename.
command_lines: list
PhoSim command entries.
object_lines: list
PhoSim object entries.
obs_md: ObservationMetadata
Observation metadata for the visit.
minsource: int
Minimum number of sersic objects to require for a sensor-visit
to be simulated.
"""
def __init__(self, instcat, numRows=None):
def __init__(self, object_lines, trimmer):
"""
Parameters
----------
instcat: str
Path to input instance catalog. The file can have includeobj
entries.
numRows: int [None]
Number of rows to read from the instance catalog. If None,
then read all rows.
object_lines: list
list of object entries from an instance catalog.
trimmer: InstCatTrimmer
An instance of the InstCatTrimmer class to provide
visit-level metadata.
"""
self.instcat_file = instcat

# Use .fopen to read in the command and object lines from the
# instance catalog.
with desc.imsim.fopen(instcat, mode='rt') as input_:
if numRows is None:
lines = [x for x in input_ if not x.startswith('#')]
else:
lines = [x for _, x in zip(range(numRows), input_)
if not x.startswith('#')]

# Extract the phosim commands and create the
# ObservationMetadata object.
self.command_lines = []
phosim_commands = dict()
for line in lines:
if line.startswith('object'):
break
tokens = line.strip().split()
phosim_commands[tokens[0]] = float(tokens[1])
self.command_lines.append(line)
try:
self.minsource = phosim_commands['minsource']
except KeyError:
self.minsource = None

phosim_commands['bandpass'] = 'ugrizy'[int(phosim_commands['filter'])]
self.obs_md = desc.imsim.phosim_obs_metadata(phosim_commands)

# Save the object lines separately.
self.object_lines = lines[len(self.command_lines):]
self.object_lines = object_lines
self.trimmer = trimmer

# Extract the ra, dec values for each object.
self._ra = np.zeros(len(self.object_lines), dtype=np.float)
self._dec = np.zeros(len(self.object_lines), dtype=np.float)
self._sersic = np.zeros(len(self.object_lines), dtype=np.int)
self._magnorm = np.zeros(len(self.object_lines), dtype=np.float)
for i, line in enumerate(self.object_lines):
self._ra = np.zeros(len(object_lines), dtype=np.float)
self._dec = np.zeros(len(object_lines), dtype=np.float)
self._sersic = np.zeros(len(object_lines), dtype=np.int)
self._magnorm = np.zeros(len(object_lines), dtype=np.float)
for i, line in enumerate(object_lines):
tokens = line.strip().split()
self._ra[i] = np.float(tokens[2])
self._dec[i] = np.float(tokens[3])
if 'sersic2d' in line:
self._sersic[i] = 1
self._magnorm[i] = np.float(tokens[4])

self._camera = desc.imsim.get_obs_lsstSim_camera()

def compute_chip_center(self, chip_name):
Expand All @@ -121,7 +75,7 @@ def compute_chip_center(self, chip_name):
center_x, center_y = desc.imsim.get_chip_center(chip_name, self._camera)
return lsst.sims.coordUtils.raDecFromPixelCoords(
xPix=center_x, yPix=center_y, chipName=chip_name,
camera=self._camera, obs_metadata=self.obs_md, epoch=2000.0,
camera=self._camera, obs_metadata=self.trimmer.obs_md, epoch=2000.0,
includeDistortion=True)

def get_object_entries(self, chip_name, radius=0.18, sort_magnorm=True):
Expand Down Expand Up @@ -152,8 +106,8 @@ def get_object_entries(self, chip_name, radius=0.18, sort_magnorm=True):
seps = degrees_separation(ra0, dec0, self._ra, self._dec)
index = np.where(seps < radius)

if (self.minsource is not None and
sum(self._sersic[index]) < self.minsource):
if (self.trimmer.minsource is not None and
sum(self._sersic[index]) < self.trimmer.minsource):
# Apply the minsource criterion.
return []

Expand All @@ -166,22 +120,103 @@ def get_object_entries(self, chip_name, radius=0.18, sort_magnorm=True):

return selected

def write_instcat(self, chip_name, outfile, radius=0.18):
"""
Write an instance catalog with entries centered on the desired
CCD.
class InstCatTrimmer(dict):
"""
Subclass of dict to provide trimmed instance catalogs for
acceptance cones centered on CCDs in the LSST focalplane.
Attributes
----------
instcat_file: str
Instance catalog filename.
obs_md: ObservationMetadata
Observation metadata for the visit.
minsource: int
Minimum number of sersic objects to require for a sensor-visit
to be simulated.
"""
def __init__(self, instcat, sensor_list, chunk_size=int(1e5),
radius=0.18, numRows=None):
"""
Parameters
----------
chip_name: str
Name of the CCD, e.g., "R:2,2 S:1,1".
outfile: str
Name of the output instance catalog file.
instcat: str
Path to input instance catalog. The file can have includeobj
entries.
sensor_list: list
List of sensors, e.g., "R:2,2 S:1,1", for which to provide
object lists.
chunk_size: int [int(1e5)]
Number of lines to read in at a time from the instance catalogs
to avoid excess memory usage.
radius: float [0.18]
Radius, in degrees, of the acceptance cone.
Radius in degrees for the acceptance cone to use for each
sensor.
numRows: int [None]
Maximum number of rows to read in from the instance catalog.
"""
super(InstCatTrimmer, self).__init__()
self.instcat_file = instcat
self._read_commands()
self._process_objects(sensor_list, chunk_size, radius=radius,
numRows=numRows)

def _process_objects(self, sensor_list, chunk_size, radius=0.18,
numRows=None):
"""
Loop over chunks of lines from the instance catalog
and disaggregate the entries into the separate object lists
for each sensor using the Disaggregator class to apply the
acceptance cone cut centered on each sensor.
"""
num_lines = self._get_num_lines() if numRows is None else numRows
self.update({sensor: [] for sensor in sensor_list})
with desc.imsim.fopen(self.instcat_file, mode='rt') as fd:
nread = 0
while nread < num_lines:
object_lines = []
for _, line in zip(range(chunk_size), fd):
nread += 1
if not line.startswith('object'):
continue
object_lines.append(line)
disaggregator = Disaggregator(object_lines, self)
for sensor in self:
obj_list = disaggregator.get_object_entries(sensor,
radius=radius)
self[sensor].extend(obj_list)

def _get_num_lines(self):
"""
Get the total number of lines in the instance catalog.
This is needed for the exit condition in the _process_objects
method.
"""
with open(outfile, 'w') as output:
for line in self.command_lines:
output.write(line)
for line in self.get_object_entries(chip_name, radius=radius):
output.write(line)
num_lines = 0
with desc.imsim.fopen(self.instcat_file, mode='rt') as fd:
for _ in fd:
num_lines += 1
return num_lines

def _read_commands(self):
"""Read in the commands from the instance catalog."""
max_lines = 50 # There should be fewer than 50, but put a hard
# limit to avoid suspect catalogs.
self.command_lines = []
phosim_commands = dict()
with desc.imsim.fopen(self.instcat_file, mode='rt') as input_:
for line, _ in zip(input_, range(max_lines)):
if line.startswith('object'):
break
if line.startswith('#'):
continue
self.command_lines.append(line)
tokens = line.strip().split()
phosim_commands[tokens[0]] = float(tokens[1])
try:
self.minsource = phosim_commands['minsource']
except KeyError:
self.minsource = None
phosim_commands['bandpass'] = 'ugrizy'[int(phosim_commands['filter'])]
self.obs_md = desc.imsim.phosim_obs_metadata(phosim_commands)
16 changes: 12 additions & 4 deletions tests/test_instcat_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import shutil
import numpy as np
import desc.imsim
from lsst.afw.cameraGeom import WAVEFRONT, GUIDER
from lsst.sims.utils import _pupilCoordsFromRaDec
from lsst.sims.utils import altAzPaFromRaDec
from lsst.sims.utils import angularSeparation
Expand All @@ -17,6 +18,7 @@
from lsst.sims.photUtils import Sed, BandpassDict
from lsst.sims.photUtils import Bandpass, PhotometricParameters
from lsst.sims.coordUtils import chipNameFromPupilCoordsLSST
from lsst.sims.GalSimInterface import LSSTCameraWrapper


class InstanceCatalogParserTestCase(unittest.TestCase):
Expand Down Expand Up @@ -65,7 +67,7 @@ def test_required_commands_error(self):
output_file.write(line)

with self.assertRaises(desc.imsim.PhosimInstanceCatalogParseError) as ee:
results = desc.imsim.parsePhoSimInstanceFile(dummy_catalog)
results = desc.imsim.parsePhoSimInstanceFile(dummy_catalog, ())
self.assertIn("Required commands", ee.exception.args[0])
if os.path.isfile(dummy_catalog):
os.remove(dummy_catalog)
Expand Down Expand Up @@ -395,9 +397,15 @@ def test_validate_phosim_object_list(self):
"Test the validation of the rows of the phoSimObjects DataFrame."
cat_file = os.path.join(os.environ['IMSIM_DIR'], 'tests', 'tiny_instcat.txt')

camera = LSSTCameraWrapper().camera
sensors = [det.getName() for det in camera
if det.getType() not in (WAVEFRONT, GUIDER)]
with warnings.catch_warnings(record=True) as wa:
instcat_contents = desc.imsim.parsePhoSimInstanceFile(cat_file)
[x for x in instcat_contents.sources[0]]
instcat_contents \
= desc.imsim.parsePhoSimInstanceFile(cat_file, sensors)
my_objs = set()
for sensor in sensors:
[my_objs.add(x) for x in instcat_contents.sources[1][sensor]]
self.assertGreater(len(wa), 0)

# we must detect which warning is the warning we are actually
Expand Down Expand Up @@ -426,7 +434,7 @@ def test_validate_phosim_object_list(self):
self.assertIn('1 had semi_major_axis', message)
self.assertIn('1 had n_points', message)

self.assertEqual(len(instcat_contents.sources[0]), 18)
self.assertEqual(len(my_objs), 35)
for obj in instcat_contents.sources[0]:
self.assertNotIn(obj.uniqueId, bad_unique_ids)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_psf.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_save_and_load_psf(self):
"""
instcat = os.path.join(os.environ['IMSIM_DIR'], 'tests',
'tiny_instcat.txt')
obs_md, _, _ = desc.imsim.parsePhoSimInstanceFile(instcat)
obs_md, _, _ = desc.imsim.parsePhoSimInstanceFile(instcat, ())
for psf_name in ("DoubleGaussian", "Kolmogorov", "Atmospheric"):
psf = desc.imsim.make_psf(psf_name, obs_md, screen_scale=6.4)
psf_file = os.path.join(self.test_dir, '{}.pkl'.format(psf_name))
Expand Down
Loading

0 comments on commit 2bf5c5b

Please sign in to comment.