In [1]:
from pzph1dot1 import *

  from tqdm.autonotebook import tqdm


In [3]:
    try:
        data_path = os.environ['PZPH1_DATA_PATH']
        print(format_message(f'Found PZPH1_DATA_PATH env variable: {data_path}'))

    except KeyError:
        data_path = '/data/SRGz/pzph1/'
        print(format_message(
            f'Not found PZPH1_DATA_PATH env variable. Using default path: {data_path}'))

    models_path = os.path.join(data_path, 'models')

    models_series = {
        'x0': {
            'path': os.path.join(models_path, 'x0'),
            'models': {
                # 15: 'sdssdr16_QSO+GALAXY-train_QSO_XbalancedGALAXY-sdss_unwise-wo_3XMM_XXLN_S82X_LH-w_VHzQs-v2-asinhmag_features',  # there is not sdss wise in getaroundr
                19: 'psdr2+wise_deacls8tr_QSO+GALAXY-train_QSO_XbalancedGALAXY-sdss_unwise-wo_3XMM_XXLN_S82X_LH-w_VHzQs-v2-asinhmag_features',
                21: 'psdr2+all_deacls8tr_QSO+GALAXY-train_QSO_XbalancedGALAXY-sdss_unwise-wo_3XMM_XXLN_S82X_LH-w_VHzQs-v2-asinhmag_features',
                22: 'deacls8tr_QSO+GALAXY-train_QSO_XbalancedGALAXY-sdss_unwise-wo_3XMM_XXLN_S82X_LH-w_VHzQs-v2-asinhmag_features',
                35: 'sdssdr16+psdr2+all_deacls8tr_QSO+GALAXY-train_QSO_XbalancedGALAXY-sdss_unwise-wo_3XMM_XXLN_S82X_LH-w_VHzQs-v2-asinhmag_features',
            },
            'config': {
                'perturb': 0,
                'ebv_accounting': False,
            },
        },
        'x0pswf': {
            'path': os.path.join(models_path, 'x0'),
            'models': {
                19: 'psdr2+wise_deacls8tr_QSO+GALAXY-train_QSO_XbalancedGALAXY-sdss_unwise-wo_3XMM_XXLN_S82X_LH-w_VHzQs-v2-asinhmag_features',
                21: 'psdr2+all_deacls8tr_QSO+GALAXY-train_QSO_XbalancedGALAXY-sdss_unwise-wo_3XMM_XXLN_S82X_LH-w_VHzQs-v2-asinhmag_features',
                22: 'deacls8tr_QSO+GALAXY-train_QSO_XbalancedGALAXY-sdss_unwise-wo_3XMM_XXLN_S82X_LH-w_VHzQs-v2-asinhmag_features',
                35: 'sdssdr16+psdr2+all_deacls8tr_QSO+GALAXY-train_QSO_XbalancedGALAXY-sdss_unwise-wo_3XMM_XXLN_S82X_LH-w_VHzQs-v2-asinhmag_features',
            },
            'config': {
                'perturb': 0,
                'ebv_accounting': False,
                'use_wise_forced': True,
            },
        },
        "x1": {
            "path": os.path.join(models_path, 'x1'),
            "models": {
                "18": "sdssdr16+wise_deacls8tr_QSO+GALAXY_20201212141009",
                "19": "psdr2+wise_deacls8tr_QSO+GALAXY_20201212135046",
                "20": "sdssdr16+all_deacls8tr_QSO+GALAXY_20201212143658",
                "21": "psdr2+all_deacls8tr_QSO+GALAXY_20201212142333",
                "22": "deacls8tr_QSO+GALAXY_20201212135641",
                "34": "sdssdr16+psdr2+wise_deacls8tr_QSO+GALAXY_20201212131454",
                "35": "sdssdr16+psdr2+all_deacls8tr_QSO+GALAXY_20201212133711"
            },
            "config": {
                "perturb": 8,
                "ebv_accounting": True
            }
        },
        "x1a": {
            "path": os.path.join(models_path, 'x1'),
            "models": {
                "18": "sdssdr16+wise_deacls8tr_QSO+GALAXY_20201212141009",
                "19": "psdr2+wise_deacls8tr_QSO+GALAXY_20201212135046",
                "20": "sdssdr16+all_deacls8tr_QSO+GALAXY_20201212143658",
                "21": "psdr2+all_deacls8tr_QSO+GALAXY_20201212142333",
                "22": "deacls8tr_QSO+GALAXY_20201212135641",
                "34": "sdssdr16+psdr2+wise_deacls8tr_QSO+GALAXY_20201212131454",
                "35": "sdssdr16+psdr2+all_deacls8tr_QSO+GALAXY_20201212133711"
            },
            "config": {
                "perturb": 0,
                "ebv_accounting": True
            }
        },
        'gal0': {
            'path': os.path.join(models_path, 'gal0'),
            'models': {
                # 15: 'sdssdr16_GALAXY-train_GALAXY_million-sdss_unwise-wo_XXLN_S82X_LH-asinhmag_features',
                19: 'psdr2+wise_deacls8tr_GALAXY-train_GALAXY_million-sdss_unwise-wo_XXLN_S82X_LH-asinhmag_features',
                21: 'psdr2+all_deacls8tr_GALAXY-train_GALAXY_million-sdss_unwise-wo_XXLN_S82X_LH-asinhmag_features',
                22: 'deacls8tr_GALAXY-train_GALAXY_million-sdss_unwise-wo_XXLN_S82X_LH-asinhmag_features',
                # 34: 'sdssdr16+psdr2+wise_deacls8tr_QSO+GALAXY_20201004092833',
                35: 'sdssdr16+psdr2+all_deacls8tr_GALAXY-train_GALAXY_million-sdss_unwise-wo_XXLN_S82X_LH-asinhmag_features',
            },
            'config': {
                'perturb': 7,
                'ebv_accounting': False,
            }
        },
        'gal0pswf': {
            'path': os.path.join(models_path, 'gal0'),
            'models': {
                # 15: 'sdssdr16_GALAXY-train_GALAXY_million-sdss_unwise-wo_XXLN_S82X_LH-asinhmag_features',
                19: 'psdr2+wise_deacls8tr_GALAXY-train_GALAXY_million-sdss_unwise-wo_XXLN_S82X_LH-asinhmag_features',
                21: 'psdr2+all_deacls8tr_GALAXY-train_GALAXY_million-sdss_unwise-wo_XXLN_S82X_LH-asinhmag_features',
                22: 'deacls8tr_GALAXY-train_GALAXY_million-sdss_unwise-wo_XXLN_S82X_LH-asinhmag_features',
                # 34: 'sdssdr16+psdr2+wise_deacls8tr_QSO+GALAXY_20201004092833',
                35: 'sdssdr16+psdr2+all_deacls8tr_GALAXY-train_GALAXY_million-sdss_unwise-wo_XXLN_S82X_LH-asinhmag_features',
            },
            'config': {
                'perturb': 7,
                'ebv_accounting': False,
                'use_wise_forced': True,
            }
        }
    }

===== Not found PZPH1_DATA_PATH env variable. Using default path: /data/SRGz/pzph1/ =====


In [4]:
args = parse_cli_args(args= '--outputDir ./output/ \
--xrayCatalog ../data/3weak.gz_pkl --primaryRadius 1 \
--baseCatalog ps \
--njobs 24 \
--xrayRaCol ra --xrayDecCol dec \
--chunkSize 100000'.replace('\n', '').split(' '))

assert args.baseCatalog in ['ps', 'ls', 'sdss', 'gaiaedr3'], 'Other catalogs not implemented yet'
assert args.psEdition in ['ps2oldfluxradecbest', 'ps2fluxbest']
# assert not args.useWiseForced, 'Wise forced not implemented yet'

if args.baseCatalog == "ls":
    args.baseRaCol = 'ra'
    args.baseDecCol = 'dec'
elif args.baseCatalog == "ps":
    args.baseRaCol = "raBest"
    args.baseDecCol = "decBest"
elif args.baseCatalog == "sdss":
    args.baseRaCol = 'ra'
    args.baseDecCol = 'dec'
elif args.baseCatalog == "gaiaedr3":
    args.baseRaCol = 'ra'
    args.baseDecCol = 'dec'

get_flags_data_path(check=True)
print(args)

if args.featuresTransformModule is not None and args.featuresTransformName is not None:
    user_defined_features_transformation = _import_user_defined_features_transformation(
        args.featuresTransformModule, args.featuresTransformName
    )
else:
    user_defined_features_transformation = lambda x: x

if args.coldStart:
    try:
        shutil.rmtree(os.path.join(args.outputDir))
    except FileNotFoundError:
        pass



Namespace(assembledDataset=None, baseCatalog='ps', baseDecCol='decBest', baseRaCol='raBest', chunkSize=100000, coldStart=False, customModels=None, featuresTransformModule=None, featuresTransformName=None, getaroundrPath='/home/horungev/Catalogs/SRG/crossmatch/getaroundr.py', keepModelsInMemory=False, ls=None, lsOn=None, modelsIds=None, modelsSeries='x0pswf', njobs=24, outputDir='./output/', predictOn=None, primaryRadius=1.0, ps=None, psEdition='ps2fluxbest', psFluxesManually=False, psFluxesPath=None, psOn=None, sdss=None, sdssOn=None, secondaryRadius=1.0, useWiseForced=False, xrayCatalog='../data/3weak.gz_pkl', xrayDecCol='dec', xrayHealpixId=None, xrayRaCol='ra')


In [5]:
os.makedirs(args.outputDir, exist_ok=True)
ps_objids = []
buf_path = os.path.join(args.outputDir, 'buf')
os.makedirs(buf_path, exist_ok=True)
files2predict = []
if args.predictOn is None:
    print('args.predictOn is NoneXZZZZZZZZzzzczxvhfdjtrkhhgkjfdgjdfgjfdgjdfgugdfhgughdufghjfghjg')
    catalog_kws = dict(
        xray_data_path=args.xrayCatalog,
        xray_radec_cols=(args.xrayRaCol, args.xrayDecCol),
        base_catalog=args.baseCatalog,
        base_radec_cols=(args.baseRaCol, args.baseDecCol),
        sdss_path=args.sdss, ps_path=args.ps, ls_path=args.ls,
        sdss_on=args.sdssOn, ps_on=args.psOn, ls_on=args.lsOn,
        assembled_dataset_path=args.assembledDataset, output_dir=buf_path,
        primary_radius=args.primaryRadius,
        secondary_rasius=args.secondaryRadius, njobs=args.njobs,
        getaroundr_path=args.getaroundrPath,
        # cj_user_id=args.cjUserID, cj_password=args.cjPassword,
        ps_fluxes_manually=args.psFluxesManually, ps_fluxes=None,
        user_defined_features_transformation=user_defined_features_transformation,
        panstarrs_catalog_to_use_cause_my_bullshit_code_and_noone_to_download_the_entire_panstarrs_properly_once_and_forall=args.psEdition,
    )

    data_path = os.path.join(args.outputDir, 'data')
    data_written_file = os.path.join(data_path, "DATA_WRITTEN_FILE.txt")
    if not os.path.isfile(data_written_file):
        os.makedirs(data_path, exist_ok=True)

        if args.xrayCatalog is not None:
            if os.path.isdir(args.xrayCatalog):
                iterator = [{'xray': file} for file in
                            glob.glob(os.path.join(args.xrayCatalog, '*'))]
            elif args.xrayHealpixId is not None:
                iterator = list(
                    split_data(xray=Catalog.read_table(args.xrayCatalog),
                               xray_hp_id_col=args.xrayHealpixId)
                )
            else:
                iterator = list(
                    split_data(xray=Catalog.read_table(args.xrayCatalog),
                               chunksize=args.chunkSize))

        else:
            iterator = list(split_data(sdss=Catalog.read_table(
                args.sdss) if args.sdss is not None else None,
                                       ps=Catalog.read_table(
                                           args.ps) if args.ps is not None else None,
                                       ls=Catalog.read_table(
                                           args.ls) if args.ls is not None else None,
                                       base_catalog=args.baseCatalog,
                                       sdss_on=args.sdssOn,
                                       ps_on=args.psOn, ls_on=args.lsOn,
                                       chunksize=args.chunkSize))

        for i, chunk in tqdm.tqdm(enumerate(iterator), total=len(iterator),
                                  desc='Preparing data'):
            if 'xray' in chunk and isinstance(chunk['xray'], str):
                fname = os.path.basename(
                    os.path.splitext(chunk['xray'])[0])
            elif 'xray' in chunk and 'fnum' in chunk:
                fname = 'part-{}'.format(chunk['fnum'])
            else:
                fname = 'part-{:05d}'.format(i)

            for k in ['xray', 'sdss', 'ps', 'ls']:
                try:
                    chunk_data = chunk[k]
                except KeyError:
                    continue

                chunk_dst_path = os.path.join(data_path,
                                              f'{fname}.{k}.gz_pkl')
                chunk_data.to_pickle(chunk_dst_path, compression='gzip',
                                     protocol=4)

        with open(data_written_file, 'w'):
            pass

    chunks_files = defaultdict(dict)
    for file in glob.glob(os.path.join(data_path, '*')):
        parsed_filename = re.findall(r'^(.*)\.(.*)\.gz_pkl$',
                                     os.path.basename(file))
        print(parsed_filename)
        if parsed_filename:
            fname, chunk_type = parsed_filename[0]
            chunks_files[fname][chunk_type] = file

    for i, (fname, chunk) in tqdm.tqdm(enumerate(chunks_files.items()),
                                       total=len(chunks_files)):
        print(fname)
        # chunk_number = re.findall("^part-(\d*)$", fname)
        # print(chunk_number)
        # if not len(chunk_number):
        #     raise Exception("Wrong file name: {}".format(fname))
        # else:
        #     chunk_number = int(chunk_number[0])
        #     if chunk_number in [1]:
        #         continue

        dst_path = os.path.join(buf_path,
                                f'{fname}.features.gz_pkl')  # TODO nice names format

        if file_exists(dst_path):
            files2predict.append(dst_path)
            continue

        catalog_kws_to_chunk_types = {
            'xray_data_path': 'xray', 'sdss_path': 'sdss', 'ps_path': 'ps',
            'ls_path': 'ls'
        }
        for kw, chunk_type in catalog_kws_to_chunk_types.items():
            try:
                catalog_kws[kw] = chunk[chunk_type]
            except KeyError:
                catalog_kws[kw] = None

        if args.psFluxesPath:
            ps_fluxes = pd.read_csv(args.psFluxesPath,
                                    dtype={'objID': int})
            ps_fluxes = {k: v for k, v in ps_fluxes.groupby(by='__file__')}
            catalog_kws['ps_fluxes'] = ps_fluxes

        catalog_kws['filename'] = fname
        catalog = Catalog(**catalog_kws)
        try:
            status = catalog.prepare_data()
        except Exception as e:
            if str(e) == 'Found nothing in base catalog':
                print(dst_path, fname)
                shutil.copy(
                    os.path.join(data_path, f'{fname}.xray.gz_pkl'),
                    dst_path)
                status = None
            else:
                print(e)
                raise Exception(e)

        if status == "ps_manual":
            ps_objids.append(catalog.ps_objids)
        # shutil.move(catalog.assembled_dataset_path, dst_path)
        else:
            files2predict.append(dst_path)
else:
    print('HEEEEEEEEEEEEEEY I am here')
    for file in glob.glob(
            os.path.join(args.predictOn, '*.features.gz_pkl')):
        ### !!! Copying files
        # shutil.copy(file, buf_path)
        copyfile_link(file, buf_path)
        files2predict.append(
            os.path.join(buf_path, os.path.basename(file)))

if ps_objids:
    objids_csv_path = os.path.join(args.outputDir, 'ps_objids.csv')
    pd.concat(ps_objids).to_csv(objids_csv_path, index=False)
    print("""
    Now you are to download PanSTARRS fluxes from casjobs.
    Upload generated csv and execute query with PanSTARRS_DR2 context:
        select t.__file__, m.objid,
        m.gPSFFlux, m.gPSFFluxErr, m.gKronFlux, m.gKronFluxErr,
        m.rPSFFlux, m.rPSFFluxErr, m.rKronFlux, m.rKronFluxErr,
        m.iPSFFlux, m.iPSFFluxErr, m.iKronFlux, m.iKronFluxErr,
        m.zPSFFlux, m.zPSFFluxErr, m.zKronFlux, m.zKronFluxErr,
        m.yPSFFlux, m.yPSFFluxErr, m.yKronFlux, m.yKronFluxErr

        into MyDB.<destination table>
        from MyDB.<table you created from csv> t
        left join StackObjectAttributes m on m.objid=t.objid

    Generated csv: {}
    """.format(objids_csv_path))
else:
    if args.modelsIds is not None:
        if args.customModels is not None:
            with open(args.customModels, 'r') as fin:
                custom_models_series = json.load(fin)

            models_series = {**models_series, **custom_models_series}

        print(models_series[args.modelsSeries])

        models_path = models_series[args.modelsSeries]['path']
        models = {f'{args.modelsSeries}{mid}': model for mid, model in
                  models_series[args.modelsSeries]['models'].items()
                  if int(mid) in args.modelsIds}
        config = models_series[args.modelsSeries]['config']

        files2predict = sorted(files2predict)
        print(files2predict, models_path, models)

        try:
            use_wise_forced = config['use_wise_forced']
        except KeyError:
            use_wise_forced = False

        print(format_message("Use WISE forced = "), use_wise_forced, 'or', args.useWiseForced)

args.predictOn is NoneXZZZZZZZZzzzczxvhfdjtrkhhgkjfdgjdfgjfdgjdfgugdfhgughdufghjfghjg
[('part-00000', 'xray')]
[]


100%|██████████| 1/1 [00:00<00:00, 1065.63it/s]

part-00000





In [6]:
if args.customModels is not None:
    with open(args.customModels, 'r') as fin:
        custom_models_series = json.load(fin)

    models_series = {**models_series, **custom_models_series}

print(models_series[args.modelsSeries])

models_path = models_series[args.modelsSeries]['path']
models = {f'{args.modelsSeries}{mid}': model for mid, model in
            models_series[args.modelsSeries]['models'].items()
            if int(mid) in args.modelsIds}
config = models_series[args.modelsSeries]['config']

files2predict = sorted(files2predict)
print(files2predict, models_path, models)

try:
    use_wise_forced = config['use_wise_forced']
except KeyError:
    use_wise_forced = False

print(format_message("Use WISE forced = "), use_wise_forced, 'or', args.useWiseForced)

{'path': '/data/SRGz/pzph1/models/x0', 'models': {19: 'psdr2+wise_deacls8tr_QSO+GALAXY-train_QSO_XbalancedGALAXY-sdss_unwise-wo_3XMM_XXLN_S82X_LH-w_VHzQs-v2-asinhmag_features', 21: 'psdr2+all_deacls8tr_QSO+GALAXY-train_QSO_XbalancedGALAXY-sdss_unwise-wo_3XMM_XXLN_S82X_LH-w_VHzQs-v2-asinhmag_features', 22: 'deacls8tr_QSO+GALAXY-train_QSO_XbalancedGALAXY-sdss_unwise-wo_3XMM_XXLN_S82X_LH-w_VHzQs-v2-asinhmag_features', 35: 'sdssdr16+psdr2+all_deacls8tr_QSO+GALAXY-train_QSO_XbalancedGALAXY-sdss_unwise-wo_3XMM_XXLN_S82X_LH-w_VHzQs-v2-asinhmag_features'}, 'config': {'perturb': 0, 'ebv_accounting': False, 'use_wise_forced': True}}


TypeError: argument of type 'NoneType' is not iterable

In [None]:
        predict(files2predict, models_path, models, config,
                    wise_forced=use_wise_forced or args.useWiseForced,
                    njobs=args.njobs, keep_in_memory=args.keepModelsInMemory,
                    user_defined_features_transformation=user_defined_features_transformation)

        assemble_and_analyze_results(buf_path, args.outputDir,
                                     models_series=args.modelsSeries)