# Convert the files from Sam

A quick notebook that converts Sam's files to things that I want to work with

1. Remove columns that I don't need to save the data size
2. Separate into NEOs and MBAs
3. Remove anything from MPCORB (that's already been discovered)

In [29]:
import pandas as pd
import numpy as np
import time
import sys
sys.path.append("../mitigation_algorithm/")
import magnitudes

In [27]:
columns_to_keep = ["ObjID", "FieldMJD", "AstRA(deg)", "AstDec(deg)", "filter",
                   "MaginFilter", "AstrometricSigma(mas)", "PhotometricSigma(mag)"]
MAX_VISIT = 204

In [41]:
%%time
for visit in range(13, MAX_VISIT + 1):
    print(f"Visit: {visit}")
    start = time.time()
    path = f"/data/epyc/projects/jpl_survey_sim/10yrs/v2.99_X05/detections/visit-{visit:03d}0000.h5"
    if not os.path.exists(path):
        print("  No file found")
        continue
    df = pd.read_hdf(path)[columns_to_keep]

    obj_type = np.zeros(len(df)).astype(int)
    for i, objid in enumerate(df["ObjID"].values):
        obj_type[i] = 1 if objid[:2] == "S0" else (2 if objid[:2] == "S1" else 0)

    neo_df = df[obj_type == 1]
    mba_df = df[obj_type == 2]

    neo_df.to_hdf(f"neo/visit-{visit:03d}0000.h5", key="df")
    mba_df.to_hdf(f"mba/visit-{visit:03d}0000.h5", key="df")
    
    
    end = time.time()
    print(f'  Completed in {end - start:1.1f}s')
    print(f'  Checked {len(df):1.2e} obs, {len(neo_df):1.2e} NEOs, {len(mba_df):1.2e} MBAs')

Visit: 13
  No file found
Visit: 14
  Completed in 15.4s
  Checked 2.86e+06 obs, 5.30e+02 NEOs, 1.84e+06 MBAs
Visit: 15
  Completed in 33.7s
  Checked 5.84e+06 obs, 1.78e+03 NEOs, 4.57e+06 MBAs
Visit: 16
  No file found
Visit: 17
  Completed in 43.4s
  Checked 6.08e+06 obs, 1.91e+03 NEOs, 4.78e+06 MBAs
Visit: 18
  Completed in 28.9s
  Checked 5.01e+06 obs, 1.40e+03 NEOs, 3.88e+06 MBAs
Visit: 19
  No file found
Visit: 20
  Completed in 30.5s
  Checked 4.70e+06 obs, 1.17e+03 NEOs, 3.57e+06 MBAs
Visit: 21
  Completed in 30.0s
  Checked 5.31e+06 obs, 1.47e+03 NEOs, 4.08e+06 MBAs
Visit: 22
  Completed in 22.6s
  Checked 3.97e+06 obs, 1.19e+03 NEOs, 2.92e+06 MBAs
Visit: 23
  Completed in 22.6s
  Checked 3.38e+06 obs, 1.18e+03 NEOs, 2.48e+06 MBAs
Visit: 24
  Completed in 30.0s
  Checked 3.70e+06 obs, 1.58e+03 NEOs, 2.86e+06 MBAs
Visit: 25
  Completed in 15.7s
  Checked 2.37e+06 obs, 1.16e+03 NEOs, 1.75e+06 MBAs
Visit: 26
  Completed in 11.7s
  Checked 1.83e+06 obs, 7.85e+02 NEOs, 1.24e+06 MBA

  Completed in 14.2s
  Checked 1.91e+06 obs, 7.25e+02 NEOs, 1.38e+06 MBAs
Visit: 130
  No file found
Visit: 131
  Completed in 16.6s
  Checked 2.49e+06 obs, 7.72e+02 NEOs, 1.73e+06 MBAs
Visit: 132
  Completed in 21.5s
  Checked 3.18e+06 obs, 8.96e+02 NEOs, 2.21e+06 MBAs
Visit: 133
  Completed in 24.5s
  Checked 3.50e+06 obs, 1.00e+03 NEOs, 2.47e+06 MBAs
Visit: 134
  No file found
Visit: 135
  Completed in 35.1s
  Checked 4.83e+06 obs, 1.34e+03 NEOs, 3.44e+06 MBAs
Visit: 136
  Completed in 18.1s
  Checked 2.82e+06 obs, 5.24e+02 NEOs, 1.79e+06 MBAs
Visit: 137
  Completed in 32.4s
  Checked 4.69e+06 obs, 1.14e+03 NEOs, 3.26e+06 MBAs
Visit: 138
  No file found
Visit: 139
  No file found
Visit: 140
  Completed in 20.5s
  Checked 3.27e+06 obs, 5.62e+02 NEOs, 2.09e+06 MBAs
Visit: 141
  Completed in 40.2s
  Checked 5.77e+06 obs, 1.67e+03 NEOs, 4.18e+06 MBAs
Visit: 142
  Completed in 24.4s
  Checked 3.82e+06 obs, 1.07e+03 NEOs, 2.67e+06 MBAs
Visit: 143
  Completed in 37.8s
  Checked 5.55e+06 ob

In [34]:
import os
bad = []
for visit in range(MAX_VISIT + 1):
    if not os.path.exists(f"/data/epyc/projects/jpl_survey_sim/10yrs/v2.99_X05/detections/visit-{visit:03d}0000.h5"):
        bad.append(visit)

In [36]:
np.array(bad)

array([ 13,  16,  19,  31,  33,  36,  40,  49,  54,  55,  56,  57,  59,
        74,  76,  78,  79,  80,  82,  83,  92,  97,  98,  99, 117, 120,
       121, 130, 134, 138, 139, 161, 177, 179, 182, 194, 198])

In [31]:
before_gap = pd.read_hdf(f"/data/epyc/projects/jpl_survey_sim/10yrs/v2.99_X05/detections/visit-{12:03d}0000.h5")[columns_to_keep]

In [40]:
after_gap["FieldMJD"].min() - before_gap["FieldMJD"].max()

11.14590331538784

In [38]:
before_gap["FieldMJD"].max()

60410.07451218399

In [None]:
after_gap = pd.read_hdf(f"/data/epyc/projects/jpl_survey_sim/10yrs/v2.99_X05/detections/visit-{14:03d}0000.h5")[columns_to_keep]

# Find the S3M objects that were deleted
Okay nevermind, we're going to use the v3.0 sims and just need to remove any S3M objects that didn't make it into the hybrid catalogue.

In [42]:
s3m = pd.read_hdf("/epyc/projects/hybrid-sso-catalogs/catalogues/s3m_cart.h5")

In [56]:
hybrid = pd.read_hdf("/epyc/projects/hybrid-sso-catalogs/catalogues/hybrid_cart.h5")

In [62]:
mpcorb = pd.read_hdf("/epyc/projects/hybrid-sso-catalogs/catalogues/mpcorb_cart.h5")

In [95]:
len(mpcorb) - 101531

1176830

In [96]:
hybrid_no_mpc = hybrid.iloc[:-1176830]

In [99]:
hybrid_no_mpc.index.values

array(['S0000001a', 'S0000002a', 'S0000004a', ..., 'CEN10897', 'CEN10898',
       'CEN10899'], dtype=object)

In [100]:
diff = np.setdiff1d(hybrid_no_mpc.index.values, s3m.index.values)


KeyboardInterrupt



In [101]:
all_ind = np.concatenate((hybrid_no_mpc.index.values, s3m.index.values))

In [104]:
uni, count = np.unique(all_ind, return_counts=True)

In [133]:
deleted_s3m_objs = uni[count == 1]

In [141]:
x = 0
for i in range(len(deleted_s3m_objs)):
    if deleted_s3m_objs[i][:2] == "SL":
        x += 1

In [145]:
len(mpcorb) - len(deleted_s3m_objs) - x

107370

In [142]:
x

9372

In [147]:
%%time
s3m.drop(deleted_s3m_objs, errors="ignore")

CPU times: user 5.86 s, sys: 1.52 s, total: 7.38 s
Wall time: 7.36 s


Unnamed: 0,id,x,y,z,vx,vy,vz,t_0,H,g
S0000001a,0.0,-1.497621,-2.219998,-0.121434,0.005838,-0.006255,0.001226,54466.0,10.315000,0.15
S0000002a,1.0,-0.867436,2.369810,-0.317870,-0.006430,-0.009964,0.003795,54466.0,10.818000,0.15
S0000004a,3.0,-4.268592,-0.745253,1.259818,0.003616,-0.002620,-0.000604,54466.0,11.452000,0.15
S0000005a,4.0,-3.545760,-0.411177,-1.184829,-0.003123,-0.004855,-0.002361,54466.0,11.678000,0.15
S0000006a,5.0,0.050639,2.079326,-0.588268,-0.009195,-0.000364,0.001486,54466.0,11.869000,0.15
...,...,...,...,...,...,...,...,...,...,...
CEN10895,14402282.0,-17.031867,14.273743,-11.013700,-0.003131,-0.000959,0.001550,59215.0,19.645034,0.15
CEN10896,14402283.0,4.305471,15.438098,-12.212285,-0.003330,0.002235,0.001598,59215.0,11.600137,0.15
CEN10897,14402284.0,-14.239609,-18.592683,4.889858,0.001227,-0.002684,-0.002301,59215.0,19.503685,0.15
CEN10898,14402285.0,9.208982,-12.435805,-15.468118,0.003680,0.001658,-0.000151,59215.0,17.060558,0.15


In [135]:
np.save("deleted_s3m_ids.npy", deleted_s3m_objs)

# TODO IN THE MORNING
- Git status on epyc
- Push changes
- Pull to local
- Update with delete conditions and 3+ tracklets
- Push changes
- Pull to hyak
- Copy data over to hyak
- RUN IT