In [1]:
import os
import sys

import numpy as np
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import colors
from hydra import initialize, compose
import pickle
from glob import glob
from pprint import pprint
from tqdm import tqdm
import galsim
from copy import deepcopy
import random

# set paths to various directories based on the machine this code is being executed on
try:
    with initialize(version_base=None, config_path='config'):  
        config = compose(config_name='config.yaml')  # overrides=['machine=uzay']
except:
    with initialize(version_base=None, config_path='../../config'):  
        config = compose(config_name='config.yaml')  # overrides=['machine=uzay']

array_dir, data_dir, figure_dir, pickle_dir, repo_dir  = config.machine.array_dir, config.machine.data_dir, config.machine.figure_dir, config.machine.pickle_dir, config.machine.repo_dir

# enable use of local modules
if repo_dir not in sys.path:
    sys.path.append(repo_dir)

# set matplotlib style
plt.style.use(f'{repo_dir}/mejiro/mplstyle/science.mplstyle')

from mejiro.lenses.test import SampleStrongLens
from mejiro.plots import diagnostic_plot, plot, plot_util, overplot
from mejiro.analysis import stats
from mejiro.utils import util
from mejiro.helpers import gs
from mejiro.lenses import lens_util

Please consider updating pysiaf, e.g. pip install --upgrade pysiaf or conda update pysiaf


# the solution: default behavior of `np.random.choice` is `replace=True` i.e. same system can be selected multiple times

In [2]:
pipeline_params = util.hydra_to_dict(config.pipeline)
# debugging = pipeline_params['debugging']
debugging = False  # TODO TEMP

if debugging:
    pipeline_dir = f'{config.machine.pipeline_dir}_dev'
else:
    pipeline_dir = config.machine.pipeline_dir

detectable_lenses = lens_util.get_detectable_lenses(pipeline_dir, with_subhalos=True, verbose=True, limit=1000, exposure=True)

100%|██████████| 1000/1000 [00:36<00:00, 27.67it/s]


In [3]:
uids = [lens.uid for lens in detectable_lenses]
duplicate_uids = set([uid for uid in uids if uids.count(uid) > 1])

if duplicate_uids:
    pprint(f"Duplicate UIDs found: {duplicate_uids}")
else:
    pprint("No duplicate UIDs found.")

'No duplicate UIDs found.'


In [4]:
duplicate_lenses = [lens for lens in detectable_lenses if uids.count(lens.uid) > 1]
print(f"Number of duplicate lenses: {len(duplicate_lenses)}")

Number of duplicate lenses: 0


In [5]:
sample_duplicates = [lens for lens in detectable_lenses if lens.uid == '00004050']
print(f"Number of sample duplicates: {len(sample_duplicates)}")

Number of sample duplicates: 0


In [7]:
# print(sample_duplicates[0].detector)

In [None]:
files = sorted(glob(os.path.join(pipeline_dir, '03', '**', f'lens_{str(4050).zfill(8)}.pkl')))
pprint(files)

['/data/bwedig/mejiro/pipeline/03/sca05/lens_00004050.pkl']


In [None]:
print(f"Number of files: {len(glob(os.path.join(pipeline_dir, '03', '**', f'lens_*.pkl')))}")

Number of files: 14706


In [None]:
for f in tqdm(glob(os.path.join(pipeline_dir, '03', '**', f'lens_*.pkl'))):
    uid_from_file = f.split('/')[-1].split('_')[-1].split('.')[0]
    # print(uid_from_file)
    lens = util.unpickle(f)
    # print(lens.uid)
    if lens.uid != uid_from_file:
        print(f"UID mismatch: {lens.uid} != {uid_from_file}")

100%|██████████| 14706/14706 [09:24<00:00, 26.07it/s] 
