# Prepare the files from Jake

A quick notebook that converts Jake's files to things that I want to work with. We need to convert them to night by night files instead of object files.

In [1]:
import h5py as h5
import pandas as pd
import numpy as np
import time
import sys
sys.path.append("../mitigation_algorithm/")
import magnitudes

import sqlite3

In [2]:
from os import listdir
import os.path

In [3]:
import matplotlib.pyplot as plt

In [4]:
import time

In [5]:
%config InlineBackend.figure_format = "retina"

## Find the S3M objects that were deleted

In [42]:
s3m = pd.read_hdf("/epyc/projects/hybrid-sso-catalogs/catalogues/s3m_cart.h5")

In [142]:
hybrid = pd.read_hdf("/epyc/projects/hybrid-sso-catalogs/catalogues/hybrid.h5")

In [158]:
s_things = hybrid.index.str.startswith("S")
centaurs = hybrid.index.str.startswith("CEN")

In [164]:
mpcorb_starts_here = np.where(s_things | centaurs)[0][-1] + 1

In [172]:
hybrid_no_mpc = hybrid.iloc[:mpcorb_starts_here]

In [174]:
all_ind = np.concatenate((hybrid_no_mpc.index.values, s3m.index.values))

In [175]:
uni, count = np.unique(all_ind, return_counts=True)

In [176]:
deleted_s3m_ids = uni[count == 1]

In [177]:
%%time
should_be_hybrid = s3m.drop(deleted_s3m_ids, errors="ignore")
should_be_hybrid

CPU times: user 15.7 s, sys: 1.39 s, total: 17.1 s
Wall time: 16.9 s


Unnamed: 0_level_0,q,e,i,Omega,argperi,t_p,H,t_0
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
S0000001a,1.251459,0.382244,9.304721,252.063850,185.610748,54067.969964,10.315000,54466.0
S0000002a,1.286219,0.662518,18.303865,132.486568,80.463153,54670.088589,10.818000,54466.0
S0000003a,0.617193,0.645272,10.302493,67.484378,182.131418,54522.349738,11.175000,54466.0
S0000004a,0.383306,0.856419,16.988463,117.793282,242.430598,54973.510299,11.452000,54466.0
S0000005a,0.569264,0.772125,22.761089,314.323204,73.119299,54075.567352,11.678000,54466.0
...,...,...,...,...,...,...,...,...
CEN10895,20.147020,0.269259,45.440626,169.247634,238.068790,51314.861160,19.645034,59215.0
CEN10896,20.147020,0.269259,45.440626,123.036860,299.679837,59048.638961,11.600137,59215.0
CEN10897,20.147020,0.269259,45.440626,64.417778,88.149692,52290.662773,19.503685,59215.0
CEN10898,20.147020,0.269259,45.440626,26.368096,225.970031,54802.202897,17.060558,59215.0


In [193]:
np.save("delete_s3m_ids.npy", deleted_s3m_ids)

In [6]:
delete_s3m_ids = np.load("delete_s3m_ids.npy", allow_pickle=True)

# Convert files

In [7]:
s3m = pd.read_hdf("/epyc/projects/hybrid-sso-catalogs/catalogues/s3m_initial.h5")
hex_ids = np.array([f'{num:07X}' for num in np.arange(len(s3m.index.values))])
s3m_to_hex7 = dict(zip(s3m.index.values, hex_ids))

In [28]:
with h5.File(os.path.join("/epyc/data3/jake_dp03/for_tom", "mpcorb_100.h5"), "r") as f:
    keys = np.array([k for k in f.keys()])
    
df = pd.read_hdf(os.path.join("/epyc/data3/jake_dp03/for_tom", "mpcorb_100.h5"), "10000")

In [39]:
def convert_jake_files(in_path="/epyc/data3/jake_dp03/for_tom",
                       out_path="/epyc/projects/neocp-predictions/output/synthetic_obs/",
                       night_zero=60795,
                       prefix="s3m"):
    
    file_list = np.array([f for f in listdir(in_path) if f.startswith(prefix)])
    order = np.argsort([int(f.split("_")[1].split(".")[0]) for f in file_list])
    file_list = file_list[order]
    print(file_list)
    
    # loop over every Jake file
    for file in file_list:
        print(f"Starting file {file}")
        
        # get the file keys
        with h5.File(os.path.join(in_path, file), "r") as f:
            keys = np.array([k for k in f.keys()])

        keys = keys[np.argsort([int(k) for k in keys])]
        
        # read in each key in the file
        for key in keys:
            start = time.time()
            print(f"  Reading {file}, key {key}")
            df = pd.read_hdf(os.path.join(in_path, file), key=key,
                             columns=["ObjID", "FieldMJD_TAI", "AstRA(deg)",
                                      "AstDec(deg)", "AstrometricSigma(deg)",
                                      "PhotometricSigmaTrailedSource(mag)",
                                      "observedTrailedSourceMag", "optFilter"])
            
            print("    Converting IDs, dropping non-hybrid objects...")
            df["night"] = (df["FieldMJD_TAI"] - 0.5).astype(int)
            df["night"] -= night_zero
            
            if prefix != "mpcorb":
                df["hex_id"] = np.array([s3m_to_hex7[df["ObjID"].iloc[i]]
                                         for i in range(len(df))])
            
                df.set_index("ObjID", inplace=True)
                df.drop(delete_s3m_ids, inplace=True, errors="ignore")
                df.reset_index(inplace=True)
            else:
                df["ObjID"] = df["ObjID"].str.ljust(12)
            
            print("    Saving nights...")
            for night in df["night"].unique():
                night_df = df[df["night"] == night]
                file_name = f"night_{night:04d}.h5"
                if prefix == "mpcorb":
                    file_name = f"{prefix}_{file_name}"
                night_file = os.path.join(out_path, file_name)
                
                if not os.path.isfile(night_file):
                    night_df.to_hdf(night_file, key="df", format="table")
                else:
                    with pd.HDFStore(night_file, "a") as store:
                        store.append('df', night_df)
                if (night % 500) == 0 and night > 0:
                    print(f"       Night {night} done")
                
            print(f"  Done with key {key} - took {time.time() - start:1.1f}s")

In [None]:
%%time
convert_jake_files()

Starting file s3m_64.h5
  Reading s3m_64.h5, key 20000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 20000 - took 143.8s
  Reading s3m_64.h5, key 40000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 40000 - took 138.9s
  Reading s3m_64.h5, key 60000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 60000 - took 128.9s
  Reading s3m_64.h5, key 80000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 d

       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 40000 - took 123.9s
  Reading s3m_70.h5, key 60000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 60000 - took 112.0s
  Reading s3m_70.h5, key 80000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 80000 - took 110.5s
  Reading s3m_70.h5, key 100000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 100000 - took 109.0s
Starting file s3m_71.h5
  Reading s3m_71.h5, key 20000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
    

    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 80000 - took 92.4s
  Reading s3m_76.h5, key 100000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 100000 - took 107.9s
Starting file s3m_77.h5
  Reading s3m_77.h5, key 20000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 20000 - took 129.7s
  Reading s3m_77.h5, key 40000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 40000 - took 117.0s
  Reading s3m_77.h5, key 60000
    Conver

    Converting IDs, dropping non-hybrid objects...
    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 80000 - took 164.0s
  Reading s3m_83.h5, key 100000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 100000 - took 183.2s
Starting file s3m_84.h5
  Reading s3m_84.h5, key 20000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 20000 - took 215.5s
  Reading s3m_84.h5, key 40000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
       Night 1000 done
       Night 2000 done
       Night 2500 done
       Night 3000 done
       Night 3500 done
  Done with key 40000 - t

In [40]:
%%time
convert_jake_files(prefix="mpcorb")

['mpcorb_0.h5' 'mpcorb_1.h5' 'mpcorb_2.h5' 'mpcorb_3.h5' 'mpcorb_4.h5'
 'mpcorb_5.h5' 'mpcorb_6.h5' 'mpcorb_7.h5' 'mpcorb_8.h5' 'mpcorb_9.h5'
 'mpcorb_10.h5' 'mpcorb_11.h5' 'mpcorb_12.h5' 'mpcorb_13.h5'
 'mpcorb_14.h5' 'mpcorb_15.h5' 'mpcorb_16.h5' 'mpcorb_17.h5'
 'mpcorb_18.h5' 'mpcorb_19.h5' 'mpcorb_20.h5' 'mpcorb_21.h5'
 'mpcorb_22.h5' 'mpcorb_23.h5' 'mpcorb_24.h5' 'mpcorb_25.h5'
 'mpcorb_26.h5' 'mpcorb_27.h5' 'mpcorb_28.h5' 'mpcorb_29.h5'
 'mpcorb_30.h5' 'mpcorb_31.h5' 'mpcorb_32.h5' 'mpcorb_33.h5'
 'mpcorb_34.h5' 'mpcorb_35.h5' 'mpcorb_36.h5' 'mpcorb_37.h5'
 'mpcorb_38.h5' 'mpcorb_39.h5' 'mpcorb_40.h5' 'mpcorb_41.h5'
 'mpcorb_42.h5' 'mpcorb_43.h5' 'mpcorb_44.h5' 'mpcorb_45.h5'
 'mpcorb_46.h5' 'mpcorb_47.h5' 'mpcorb_48.h5' 'mpcorb_49.h5'
 'mpcorb_50.h5' 'mpcorb_51.h5' 'mpcorb_52.h5' 'mpcorb_53.h5'
 'mpcorb_54.h5' 'mpcorb_55.h5' 'mpcorb_56.h5' 'mpcorb_57.h5'
 'mpcorb_58.h5' 'mpcorb_59.h5' 'mpcorb_60.h5' 'mpcorb_61.h5'
 'mpcorb_62.h5' 'mpcorb_63.h5' 'mpcorb_64.h5' 'mpcorb_65.h5'
 'mp

  Done with key 10000 - took 0.3s
Starting file mpcorb_38.h5
  Reading mpcorb_38.h5, key 10000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
  Done with key 10000 - took 0.3s
Starting file mpcorb_39.h5
  Reading mpcorb_39.h5, key 10000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
  Done with key 10000 - took 0.4s
Starting file mpcorb_40.h5
  Reading mpcorb_40.h5, key 10000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
  Done with key 10000 - took 0.3s
Starting file mpcorb_41.h5
  Reading mpcorb_41.h5, key 10000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
  Done with key 10000 - took 0.3s
Starting file mpcorb_42.h5
  Reading mpcorb_42.h5, key 10000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
  Done with key 10000 - took 0.3s
Starting file mpcorb_43.h5
  Reading mpcorb_43.h5, key 10000
    Converting IDs, dropping non-hybrid objects...
    Saving nights..

  Done with key 10000 - took 0.4s
Starting file mpcorb_88.h5
  Reading mpcorb_88.h5, key 10000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
  Done with key 10000 - took 0.3s
Starting file mpcorb_89.h5
  Reading mpcorb_89.h5, key 10000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
  Done with key 10000 - took 0.3s
Starting file mpcorb_90.h5
  Reading mpcorb_90.h5, key 10000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
  Done with key 10000 - took 0.3s
Starting file mpcorb_91.h5
  Reading mpcorb_91.h5, key 10000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
  Done with key 10000 - took 0.3s
Starting file mpcorb_92.h5
  Reading mpcorb_92.h5, key 10000
    Converting IDs, dropping non-hybrid objects...
    Saving nights...
  Done with key 10000 - took 0.4s
Starting file mpcorb_93.h5
  Reading mpcorb_93.h5, key 10000
    Converting IDs, dropping non-hybrid objects...
    Saving nights..

## Get numbers for SLURM runs
My code is stupid but it works so we need to run a single night from each file and then do the rest. I choose the second thing in each file to make sure it is all contained in a single file.

In [203]:
f2n = np.load("f2n.npy", allow_pickle=True)

In [234]:
# last night in the same file as 365, might as well run all of these
MAX_NIGHT = np.max(f2n[-1])

In [235]:
run_first = np.array([f2n[i][1] for i in range(len(f2n))])
run_first = run_first[run_first < MAX_NIGHT]
run_first_str = ','.join(run_first.astype(str))
run_next_str = ','.join(np.setdiff1d(np.arange(MAX_NIGHT), run_first).astype(str))

In [None]:
434,454,470,485,527,573,601,787,805,1881

In [236]:
print(run_first_str)
print()
print(run_next_str)

1,13,29,49,63,78,96,111,125,138,153,165,181,192,203,214,230,269,285,310,331,354,375,393,418,434,454,470,485,507,527,547,573,601,632,646,658,676,690,704,723,739,753,770,787,805,826,841,856,869,882,896,908,923,940,955,971,1008,1024,1037,1050,1078,1092,1115,1139,1153,1168,1187,1204,1218,1233,1248,1262,1273,1293,1305,1317,1335,1368,1383,1395,1405,1420,1436,1458,1471,1499,1516,1530,1547,1563,1579,1593,1609,1624,1639,1656,1668,1681,1699,1721,1735,1757,1774,1797,1813,1832,1858,1881,1896,1913,1941,1955,1968,1982,1996,2009,2025,2042,2073,2108,2129,2149,2178,2194,2214,2230,2249,2266,2282,2297,2311,2325,2341,2355,2367,2379,2393,2405,2427,2454,2473,2490,2502,2519,2535,2558,2574,2602,2622,2638,2652,2669,2682,2696,2711,2732,2748,2765,2778,2793,2823,2841,2859,2881,2895,2910,2925,2945,2970,2991,3011,3028,3045,3059,3079,3093,3105,3119,3149,3166,3201,3218,3235,3261,3280,3299,3315,3337,3351,3366,3380,3398,3412,3427,3439,3452,3464,3478,3494,3505,3522,3550,3571,3581

0,2,3,4,5,6,7,8,9,10,11,12,14,15,16,17,