# Proof of Concept

In [41]:
import requests
from urllib3.exceptions import ProtocolError
import pprint
import pandas as pd
from pandas.io.json import json_normalize
import json
import numpy as np
import yaml

## Data Acquisition

### Basic API

In [26]:
api_base = "http://www.asterank.com/api/rankings"
api_params = "?sort_by=score&limit=100000"

In [27]:
rsp = requests.get(api_base+api_params)

In [28]:
rsp_json = rsp.json()

In [29]:
df = pd.DataFrame(rsp_json)

In [30]:
df.head()

Unnamed: 0,A1,A2,A3,BV,G,GM,H,H_sigma,IR,UB,...,sigma_w,spec,spec_B,spec_T,spkid,t_jup,tp,tp_cal,two_body,w
0,,-3.37188e-14,,,,,19.3,,,,...,4.1e-05,Cg,Cg,,2162173.0,5.308,2455902.0,20111210.0,,211.42585
1,,6.116535e-14,,,,,19.3,,,,...,6.6e-05,X,X,,2010302.0,5.066,2454008.0,20060930.0,,183.242704
2,,,,,,,18.2,,,,...,3.1e-05,Xe,Xe,,2004660.0,4.493,2458260.0,20180520.0,,158.016312
3,,-4.614425e-14,,0.64,-0.08,5.2e-09,20.19,0.06,-0.33,0.16,...,6e-06,B,B,,2101955.0,5.525,2455439.0,20100830.0,,66.223068
4,,,,,,,18.0,,,,...,2.6e-05,Xk,Xk,,2065803.0,4.2,2457563.0,20160620.0,,319.251589


In [31]:
df.to_csv("data/asterank_10000.csv",index=False)

### Get compositions

In [4]:
api_composition = 'http://www.asterank.com/api/compositions'

In [9]:
rsp_composition = requests.get(api_composition)
rsp_composition_json = rsp_composition.json()
df_composition = pd.DataFrame(rsp_composition_json).T

In [11]:
df_composition.to_csv('data/composition.csv')

### Get Details

Asteroid rank also provides a jpl api.

In [12]:
api_jpl = 'http://www.asterank.com/jpl/lookup?query='
jpl_id = '1999%20JU3'

In [23]:
def get_jpl_data(id, api_url = None, fields = None):
    """
    get more detailed data from jpl api
    
    """
    if api_url is None:
        api_url = 'http://www.asterank.com/jpl/lookup?query='
    
    if fields is None:
        fields = ['Aphelion (AU)', "Diameter (km)", "Semi-major Axis (AU)", "GM (km^3/s^2)",\
                  'Rotation (hrs)', "Inclination (deg)", "Extent (km)", "Perihelion (AU)",\
                  "Density (g/cm^3)", "Period (days)", "EMOID (AU)", "Albedo"]
    tries = 10
    while True:
        try:
            rsp = requests.get( api_url+id, timeout=5)
            if rsp.status_code == 500:
                raise Exception(f'500: Internal Error for {id}')
            elif rsp.status_code == 401:
                raise Exception(f'401: Check your authentication!')
            break
        except:
            tries -= 1
            if tries == 0:
                raise Exception('Retries failed for {}/{}'.format(country,city))
    
    rsp_json = rsp.json()
    
    return {field: (rsp_json.get(field) if rsp_json.get(field) != -1 else None) for field in fields }
    
    

In [24]:
get_jpl_data(jpl_id)

{'Aphelion (AU)': 1.415882602923449,
 'Diameter (km)': None,
 'Semi-major Axis (AU)': 1.189545600425906,
 'GM (km^3/s^2)': None,
 'Rotation (hrs)': 7.63,
 'Inclination (deg)': 5.884035115119038,
 'Extent (km)': None,
 'Perihelion (AU)': 0.9632085979283624,
 'Density (g/cm^3)': None,
 'Period (days)': 473.8818437203764,
 'EMOID (AU)': 0.000225189,
 'Albedo': None}

## Data Transformation

Some notes:

1. rot_per: hours per rotation

In [35]:
df_sorted = df.sort_values(by = ['score'], ascending=False )

In [36]:
columns_of_interest = ['id', 'name', 'full_name', 'spec', 'spec_B', 'spec_T', 'score', 'producer', 'price', 'diameter', 'diameter_sigma', 'a', 'e', 'dv', 'class', 'rot_per', 'profit']

In [37]:
df_sorted[ columns_of_interest ].head(10)

Unnamed: 0,id,name,full_name,spec,spec_B,spec_T,score,producer,price,diameter,diameter_sigma,a,e,dv,class,rot_per,profit
0,a0162173,Ryugu,162173 Ryugu (1999 JU3),Cg,Cg,,289.648575,Davide Farnocchia,82756160000.0,,,1.189601,0.190226,4.663076,APO,7.627,30080460000.0
1,a0010302,,10302 (1989 ML),X,X,,262.460209,Otto Matic,13941860000.0,,,1.272462,0.13656,4.888815,AMO,19.0,4379885000.0
2,a0004660,Nereus,4660 Nereus (1982 DB),Xe,Xe,,251.239956,Otto Matic,4714379000.0,0.33,,1.48876,0.359972,4.984853,APO,15.1,1390464000.0
3,a0101955,Bennu,101955 Bennu (1999 RQ36),B,B,,240.458324,Otto Matic,669957000.0,0.492,0.02,1.126391,0.203745,5.095923,APO,4.297,185002100.0
4,a0065803,Didymos,65803 Didymos (1996 GT),Xk,Xk,,232.392954,Davide Farnocchia,62253880000.0,0.78,0.08,1.644589,0.383833,5.163731,APO,2.2593,16394710000.0
5,a0436724,,436724 (2011 UW158),Xc,,,229.935533,Otto Matic,6693105000.0,,,1.620642,0.376221,5.18949,APO,0.61073,1735398000.0
6,a0001943,Anteros,1943 Anteros (1973 EC),L,L,S,208.337164,Otto Matic,5574298000000.0,2.3,,1.430444,0.256043,5.439294,AMO,2.86923,1249361000000.0
7,a0098943,,98943 (2001 CC21),L,L,,194.9778,Otto Matic,147043100000.0,,,1.032548,0.219359,5.635712,APO,5.017,29768150000.0
8,a0007474,,7474 (1992 TC),X,X,,192.81312,Otto Matic,84008010000.0,,,1.56564,0.292275,5.647982,AMO,5.54,16781630000.0
9,a0194006,,194006 (2001 SG10),X,X,,179.392919,Otto Matic,3050146000.0,,,1.448534,0.424268,5.879627,APO,,544598800.0


In [38]:
def save_as_yaml(ast_dic):
    
    yaml_data = {key: (float(val) if isinstance(val, np.float64) else val) for key, val in ast_dic.items()}

    with open('data/yaml/{}.yml'.format( str( yaml_data.get('id') ) ), 'w') as ymlfile:
        yaml.dump( yaml_data , ymlfile, default_flow_style=False, encoding='utf-8', allow_unicode=True)

In [39]:
df_100 = df_sorted[ columns_of_interest ].head(100)

Save the top 100 high scores in files as the text database

In [44]:
for i in range(len(df_100)):
    save_as_yaml( df_sorted.iloc[i].to_dict() )