# Convert the files from Sam

A quick notebook that converts Sam's files to things that I want to work with

1. Remove columns that I don't need to save the data size
2. Separate into NEOs and MBAs
3. Remove anything from MPCORB (that's already been discovered)

In [29]:
import pandas as pd
import numpy as np
import time
import sys
sys.path.append("../mitigation_algorithm/")
import magnitudes

In [27]:
columns_to_keep = ["ObjID", "FieldMJD", "AstRA(deg)", "AstDec(deg)", "filter",
                   "MaginFilter", "AstrometricSigma(mas)", "PhotometricSigma(mag)"]
MAX_VISIT = 204

In [41]:
%%time
for visit in range(13, MAX_VISIT + 1):
    print(f"Visit: {visit}")
    start = time.time()
    path = f"/data/epyc/projects/jpl_survey_sim/10yrs/v2.99_X05/detections/visit-{visit:03d}0000.h5"
    if not os.path.exists(path):
        print("  No file found")
        continue
    df = pd.read_hdf(path)[columns_to_keep]

    obj_type = np.zeros(len(df)).astype(int)
    for i, objid in enumerate(df["ObjID"].values):
        obj_type[i] = 1 if objid[:2] == "S0" else (2 if objid[:2] == "S1" else 0)

    neo_df = df[obj_type == 1]
    mba_df = df[obj_type == 2]

    neo_df.to_hdf(f"neo/visit-{visit:03d}0000.h5", key="df")
    mba_df.to_hdf(f"mba/visit-{visit:03d}0000.h5", key="df")
    
    
    end = time.time()
    print(f'  Completed in {end - start:1.1f}s')
    print(f'  Checked {len(df):1.2e} obs, {len(neo_df):1.2e} NEOs, {len(mba_df):1.2e} MBAs')

Visit: 13
  No file found
Visit: 14
  Completed in 15.4s
  Checked 2.86e+06 obs, 5.30e+02 NEOs, 1.84e+06 MBAs
Visit: 15
  Completed in 33.7s
  Checked 5.84e+06 obs, 1.78e+03 NEOs, 4.57e+06 MBAs
Visit: 16
  No file found
Visit: 17
  Completed in 43.4s
  Checked 6.08e+06 obs, 1.91e+03 NEOs, 4.78e+06 MBAs
Visit: 18
  Completed in 28.9s
  Checked 5.01e+06 obs, 1.40e+03 NEOs, 3.88e+06 MBAs
Visit: 19
  No file found
Visit: 20
  Completed in 30.5s
  Checked 4.70e+06 obs, 1.17e+03 NEOs, 3.57e+06 MBAs
Visit: 21
  Completed in 30.0s
  Checked 5.31e+06 obs, 1.47e+03 NEOs, 4.08e+06 MBAs
Visit: 22
  Completed in 22.6s
  Checked 3.97e+06 obs, 1.19e+03 NEOs, 2.92e+06 MBAs
Visit: 23
  Completed in 22.6s
  Checked 3.38e+06 obs, 1.18e+03 NEOs, 2.48e+06 MBAs
Visit: 24
  Completed in 30.0s
  Checked 3.70e+06 obs, 1.58e+03 NEOs, 2.86e+06 MBAs
Visit: 25
  Completed in 15.7s
  Checked 2.37e+06 obs, 1.16e+03 NEOs, 1.75e+06 MBAs
Visit: 26
  Completed in 11.7s
  Checked 1.83e+06 obs, 7.85e+02 NEOs, 1.24e+06 MBA

  Completed in 14.2s
  Checked 1.91e+06 obs, 7.25e+02 NEOs, 1.38e+06 MBAs
Visit: 130
  No file found
Visit: 131
  Completed in 16.6s
  Checked 2.49e+06 obs, 7.72e+02 NEOs, 1.73e+06 MBAs
Visit: 132
  Completed in 21.5s
  Checked 3.18e+06 obs, 8.96e+02 NEOs, 2.21e+06 MBAs
Visit: 133
  Completed in 24.5s
  Checked 3.50e+06 obs, 1.00e+03 NEOs, 2.47e+06 MBAs
Visit: 134
  No file found
Visit: 135
  Completed in 35.1s
  Checked 4.83e+06 obs, 1.34e+03 NEOs, 3.44e+06 MBAs
Visit: 136
  Completed in 18.1s
  Checked 2.82e+06 obs, 5.24e+02 NEOs, 1.79e+06 MBAs
Visit: 137
  Completed in 32.4s
  Checked 4.69e+06 obs, 1.14e+03 NEOs, 3.26e+06 MBAs
Visit: 138
  No file found
Visit: 139
  No file found
Visit: 140
  Completed in 20.5s
  Checked 3.27e+06 obs, 5.62e+02 NEOs, 2.09e+06 MBAs
Visit: 141
  Completed in 40.2s
  Checked 5.77e+06 obs, 1.67e+03 NEOs, 4.18e+06 MBAs
Visit: 142
  Completed in 24.4s
  Checked 3.82e+06 obs, 1.07e+03 NEOs, 2.67e+06 MBAs
Visit: 143
  Completed in 37.8s
  Checked 5.55e+06 ob

In [166]:
import os
bad = []
for visit in range(MAX_VISIT + 1):
    if not os.path.exists(f"/data/epyc/projects/jpl_survey_sim/10yrs/v3.0/detections/S1_00/visit-{visit:03d}0000.h5"):
        bad.append(visit)

In [167]:
np.array(bad)

array([], dtype=float64)

In [31]:
before_gap = pd.read_hdf(f"/data/epyc/projects/jpl_survey_sim/10yrs/v2.99_X05/detections/visit-{12:03d}0000.h5")[columns_to_keep]

In [40]:
after_gap["FieldMJD"].min() - before_gap["FieldMJD"].max()

11.14590331538784

In [38]:
before_gap["FieldMJD"].max()

60410.07451218399

In [None]:
after_gap = pd.read_hdf(f"/data/epyc/projects/jpl_survey_sim/10yrs/v2.99_X05/detections/visit-{14:03d}0000.h5")[columns_to_keep]

# Find the S3M objects that were deleted
Okay nevermind, we're going to use the v3.0 sims and just need to remove any S3M objects that didn't make it into the hybrid catalogue.

In [42]:
s3m = pd.read_hdf("/epyc/projects/hybrid-sso-catalogs/catalogues/s3m_cart.h5")

In [56]:
hybrid = pd.read_hdf("/epyc/projects/hybrid-sso-catalogs/catalogues/hybrid_cart.h5")

In [62]:
mpcorb = pd.read_hdf("/epyc/projects/hybrid-sso-catalogs/catalogues/mpcorb_cart.h5")

In [95]:
len(mpcorb) - 101531

1176830

In [96]:
hybrid_no_mpc = hybrid.iloc[:-1176830]

In [99]:
hybrid_no_mpc.index.values

array(['S0000001a', 'S0000002a', 'S0000004a', ..., 'CEN10897', 'CEN10898',
       'CEN10899'], dtype=object)

In [100]:
diff = np.setdiff1d(hybrid_no_mpc.index.values, s3m.index.values)


KeyboardInterrupt



In [101]:
all_ind = np.concatenate((hybrid_no_mpc.index.values, s3m.index.values))

In [104]:
uni, count = np.unique(all_ind, return_counts=True)

In [133]:
deleted_s3m_objs = uni[count == 1]

In [141]:
x = 0
for i in range(len(deleted_s3m_objs)):
    if deleted_s3m_objs[i][:2] == "SL":
        x += 1

In [145]:
len(mpcorb) - len(deleted_s3m_objs) - x

107370

In [171]:
s3m

Unnamed: 0,id,x,y,z,vx,vy,vz,t_0,H,g
S0000001a,0.0,-1.497621,-2.219998,-0.121434,0.005838,-0.006255,0.001226,54466.0,10.315000,0.15
S0000002a,1.0,-0.867436,2.369810,-0.317870,-0.006430,-0.009964,0.003795,54466.0,10.818000,0.15
S0000003a,2.0,-0.957813,0.466564,0.193313,0.002716,-0.019129,-0.001788,54466.0,11.175000,0.15
S0000004a,3.0,-4.268592,-0.745253,1.259818,0.003616,-0.002620,-0.000604,54466.0,11.452000,0.15
S0000005a,4.0,-3.545760,-0.411177,-1.184829,-0.003123,-0.004855,-0.002361,54466.0,11.678000,0.15
...,...,...,...,...,...,...,...,...,...,...
CEN10895,14402282.0,-17.031867,14.273743,-11.013700,-0.003131,-0.000959,0.001550,59215.0,19.645034,0.15
CEN10896,14402283.0,4.305471,15.438098,-12.212285,-0.003330,0.002235,0.001598,59215.0,11.600137,0.15
CEN10897,14402284.0,-14.239609,-18.592683,4.889858,0.001227,-0.002684,-0.002301,59215.0,19.503685,0.15
CEN10898,14402285.0,9.208982,-12.435805,-15.468118,0.003680,0.001658,-0.000151,59215.0,17.060558,0.15


In [170]:
np.array([f'{num:07X}' for num in np.arange(len(deleted_s3m_objs))])

array(['S0000003a', 'S000000Fa', 'S000000Ga', ..., 'St500zTVa',
       'St500zo1a', 'St500zwTa'], dtype=object)

In [147]:
%%time
s3m.drop(deleted_s3m_objs, errors="ignore")

CPU times: user 5.86 s, sys: 1.52 s, total: 7.38 s
Wall time: 7.36 s


Unnamed: 0,id,x,y,z,vx,vy,vz,t_0,H,g
S0000001a,0.0,-1.497621,-2.219998,-0.121434,0.005838,-0.006255,0.001226,54466.0,10.315000,0.15
S0000002a,1.0,-0.867436,2.369810,-0.317870,-0.006430,-0.009964,0.003795,54466.0,10.818000,0.15
S0000004a,3.0,-4.268592,-0.745253,1.259818,0.003616,-0.002620,-0.000604,54466.0,11.452000,0.15
S0000005a,4.0,-3.545760,-0.411177,-1.184829,-0.003123,-0.004855,-0.002361,54466.0,11.678000,0.15
S0000006a,5.0,0.050639,2.079326,-0.588268,-0.009195,-0.000364,0.001486,54466.0,11.869000,0.15
...,...,...,...,...,...,...,...,...,...,...
CEN10895,14402282.0,-17.031867,14.273743,-11.013700,-0.003131,-0.000959,0.001550,59215.0,19.645034,0.15
CEN10896,14402283.0,4.305471,15.438098,-12.212285,-0.003330,0.002235,0.001598,59215.0,11.600137,0.15
CEN10897,14402284.0,-14.239609,-18.592683,4.889858,0.001227,-0.002684,-0.002301,59215.0,19.503685,0.15
CEN10898,14402285.0,9.208982,-12.435805,-15.468118,0.003680,0.001658,-0.000151,59215.0,17.060558,0.15


In [183]:
np.save("delete_s3m_ids.npy", deleted_s3m_objs)

# TODO IN THE MORNING
- Git status on epyc
- Push changes
- Pull to local
- Update with delete conditions and 3+ tracklets
- Push changes
- Pull to hyak
- Copy data over to hyak
- RUN IT

In [150]:
ls /data/epyc/projects/jpl_survey_sim/10yrs/v3.0/detections

[0m[01;34mS0[0m/     [01;34mS1_02[0m/  [01;34mS1_05[0m/  [01;34mS1_08[0m/  [01;34mS1_11[0m/  [01;34mSS[0m/   [01;34mcen-def-1[0m/  [01;34mmpcorb[0m/
[01;34mS1_00[0m/  [01;34mS1_03[0m/  [01;34mS1_06[0m/  [01;34mS1_09[0m/  [01;34mS1_12[0m/  [01;34mST[0m/   [01;34mcen-def-2[0m/
[01;34mS1_01[0m/  [01;34mS1_04[0m/  [01;34mS1_07[0m/  [01;34mS1_10[0m/  [01;34mS1_13[0m/  [01;34mSt5[0m/  [01;34mgneo[0m/


In [151]:
df = pd.read_hdf(f"/data/epyc/projects/jpl_survey_sim/10yrs/v3.0/detections/S1_00/visit-{0:03d}0000.h5")

<bound method DataFrame.set_index of              ObjID  FieldID      FieldMJD  AstRange(km)  AstRangeRate(km/s)  \
318      S100003La   8659.0  60229.068306  4.191751e+08           23.747380   
319      S100003La   8709.0  60229.091524  4.192228e+08           23.797479   
484      S100005Ea   6032.0  60226.010204  3.036332e+08           22.001934   
487      S100005Ea   6082.0  60226.034511  3.036795e+08           22.068440   
733      S100008ra   5391.0  60225.018639  2.938162e+08            8.496415   
...            ...      ...           ...           ...                 ...   
1915245  S1006VJ6a   3415.0  60222.293483  2.784335e+08            8.661016   
1916293  S1006W3ka   4165.0  60223.240156  2.100916e+08           10.383747   
1916296  S1006W3ka   4215.0  60223.263941  2.101130e+08           10.424331   
1917401  S1006WhNa   5676.0  60225.238710  3.216477e+08            8.703582   
1918254  S1006WxGa   9564.0  60230.089064  3.598617e+08           22.410252   

         AstRA

In [156]:
from astropy.time import Time

In [161]:
Time(60217, format="mjd").iso

'2023-09-30 00:00:00.000'

In [160]:
(df["FieldMJD"] - 0.5).astype(int).min()

60217

In [174]:
f2n = np.load("f2n.npy", allow_pickle=True)

In [181]:
f2n

array([list([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
       list([12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 28]),
       list([28, 29, 30, 31, 32, 33, 41, 42, 43, 44, 45, 46, 47, 48]),
       list([48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62]),
       list([62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77]),
       list([77, 78, 79, 80, 81, 82, 83, 84, 85, 89, 90, 91, 92, 93, 94, 95]),
       list([95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110]),
       list([110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124]),
       list([124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137]),
       list([137, 138, 139, 140, 141, 142, 144, 145, 147, 148, 149, 150, 151, 152]),
       list([152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164]),
       list([164, 165, 166, 168, 169, 172, 173, 174, 175, 176, 177, 178, 179, 180]),
       list([180, 181, 182, 183, 184, 185, 186, 1

In [187]:
run_first = np.array([f2n[i][1] for i in range(len(f2n))])

In [198]:
run_next_str = ','.join(np.setdiff1d(np.arange(373), run_first[run_first < 372])[1:].astype(str))

In [202]:
print(','.join(run_first[run_first < 372].astype(str)))
print()
print(run_next_str)

1,13,29,49,63,78,96,111,125,138,153,165,181,192,203,214,230,269,285,310,331,354

2,3,4,5,6,7,8,9,10,11,12,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,50,51,52,53,54,55,56,57,58,59,60,61,62,64,65,66,67,68,69,70,71,72,73,74,75,76,77,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,97,98,99,100,101,102,103,104,105,106,107,108,109,110,112,113,114,115,116,117,118,119,120,121,122,123,124,126,127,128,129,130,131,132,133,134,135,136,137,139,140,141,142,143,144,145,146,147,148,149,150,151,152,154,155,156,157,158,159,160,161,162,163,164,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,182,183,184,185,186,187,188,189,190,191,193,194,195,196,197,198,199,200,201,202,204,205,206,207,208,209,210,211,212,213,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,270,271,272,273

In [186]:
np.setdiff1d(np.arange(373), 

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18

In [180]:
','.join([str(f2n[i][1]) for i in range(len(f2n))])

'1,13,29,49,63,78,96,111,125,138,153,165,181,192,203,214,230,269,285,310,331,354,375,393,418,434,454,470,485,507,527,547,573,601,632,646,658,676,690,704,723,739,753,770,787,805,826,841,856,869,882,896,908,923,940,955,971,1008,1024,1037,1050,1078,1092,1115,1139,1153,1168,1187,1204,1218,1233,1248,1262,1273,1293,1305,1317,1335,1368,1383,1395,1405,1420,1436,1458,1471,1499,1516,1530,1547,1563,1579,1593,1609,1624,1639,1656,1668,1681,1699,1721,1735,1757,1774,1797,1813,1832,1858,1881,1896,1913,1941,1955,1968,1982,1996,2009,2025,2042,2073,2108,2129,2149,2178,2194,2214,2230,2249,2266,2282,2297,2311,2325,2341,2355,2367,2379,2393,2405,2427,2454,2473,2490,2502,2519,2535,2558,2574,2602,2622,2638,2652,2669,2682,2696,2711,2732,2748,2765,2778,2793,2823,2841,2859,2881,2895,2910,2925,2945,2970,2991,3011,3028,3045,3059,3079,3093,3105,3119,3149,3166,3201,3218,3235,3261,3280,3299,3315,3337,3351,3366,3380,3398,3412,3427,3439,3452,3464,3478,3494,3505,3522,3550,3571,3581'

In [None]:
1,13,29,49,63,78,96,111,125,138,153,165,181,192,203,214,230,269,285,310,331,354

In [184]:
pwd

'/data/epyc/projects/neocp-predictions/current_criteria'

In [185]:
ls

Convert Sam Files.ipynb                  f2n.npy
Predictions with current criteria.ipynb  helpers.py
README.md                                lsst_neocp.py
[0m[01;34m__pycache__[0m/                             [01;34mmba[0m/
delete_s3m_ids.npy                       [01;34mneo[0m/
deleted_s3m_ids.npy                      [01;34mold_not_hybrid_data[0m/
difi_MBAs_10yrs_0000.h5                  unique_findable_mba_hex_ids.npy
difi_combiner.py                         unique_findable_mba_hex_ids_linked.npy
difi_runner.py                           unique_findable_neo_hex_ids_linked.npy
