In [1]:
# %pip install numpy
# %pip install pandas
# %pip install tensorflow
# %pip install sklearn

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import unicodedata

In [3]:
orbits_df = pd.read_csv("orbits.csv", encoding = 'utf-8')
unicode_adjusted_names = {}
for asteroid_idx in orbits_df.index:
    unicode_adjusted_names[asteroid_idx] = unicodedata.normalize("NFKD", orbits_df.iloc[asteroid_idx]["Object Name"])
    
orbits_df["Object Name"] = unicode_adjusted_names.values()
orbits_df.head()

Unnamed: 0,Object Name,Object Classification,Epoch (TDB),Orbit Axis (AU),Orbit Eccentricity,Orbit Inclination (deg),Perihelion Argument (deg),Node Longitude (deg),Mean Anomoly (deg),Perihelion Distance (AU),Aphelion Distance (AU),Orbital Period (yr),Minimum Orbit Intersection Distance (AU),Orbital Reference,Asteroid Magnitude
0,433 Eros,Amor Asteroid,57800,1.4579,0.2226,10.8277,178.805,304.3265,319.3111,1.1335,1.78,1.76,0.1492,598,11.16
1,719 Albert,Amor Asteroid,57800,2.6385,0.5479,11.5822,156.1409,183.9204,224.5535,1.1928,4.08,4.29,0.2004,78,15.5
2,887 Alinda,Amor Asteroid,57800,2.4787,0.5671,9.3561,350.3482,110.5444,351.373,1.0731,3.88,3.9,0.0925,188,13.4
3,1036 Ganymed,Amor Asteroid,57800,2.6628,0.5338,26.6929,132.469,215.5551,92.564,1.2413,4.08,4.35,0.3421,597,9.45
4,1221 Amor,Amor Asteroid,57800,1.9191,0.4356,11.8795,26.6572,171.3448,313.7379,1.0832,2.76,2.66,0.1068,70,17.7


In [4]:
impacts_df = pd.read_csv("impacts.csv", encoding = 'utf-8')
impacts_df.head()

Unnamed: 0,Object Name,Period Start,Period End,Possible Impacts,Cumulative Impact Probability,Asteroid Velocity,Asteroid Magnitude,Asteroid Diameter (km),Cumulative Palermo Scale,Maximum Palermo Scale,Maximum Torino Scale
0,2006 WP1,2017,2017,1,5.2e-09,17.77,28.3,0.007,-8.31,-8.31,0
1,2013 YB,2017,2046,23,7.6e-05,8.98,31.4,0.002,-6.6,-6.96,0
2,2008 US,2017,2062,30,1.6e-05,18.33,31.4,0.002,-6.48,-6.87,0
3,2010 VR139,2017,2076,24,2e-07,4.99,26.7,0.016,-6.83,-6.95,0
4,2015 ME131,2017,2096,85,2.3e-08,19.46,19.2,0.497,-3.85,-4.3,0


In [5]:
targets_dict = {}
all_PHO = impacts_df["Object Name"]
all_asteroids = orbits_df["Object Name"]

# Create correspondence between datasets, i.e. generate targets
for asteroid_idx in all_asteroids.index:
    # Initialize dictionary entry to zero
    targets_dict[all_asteroids[asteroid_idx]] = 0

    current_asteroid = all_asteroids[asteroid_idx].split()
    current_asteroid = [codename.replace("(", "").replace(")","") for codename in current_asteroid]

    for PHO_idx in all_PHO.index:
        current_PHO = all_PHO[PHO_idx]
        year_PHO = (current_PHO.split())[0]
        codename_PHO = (current_PHO.split())[1]
        if (year_PHO in current_asteroid) and (codename_PHO in current_asteroid):
            # If this PHO corresponds to an asteroid, change target 0 -> 1
            targets_dict[all_asteroids[asteroid_idx]] = 1
            
        # We have detected the correspondence for this asteroid. Move on
        continue

targets = pd.Series(targets_dict)

In [6]:
# Sanity check
print(len(all_PHO))
print(sum(targets))

print(len(targets))
print(len(all_asteroids))

683
679
15635
15635


In [7]:
# Following the 70/30 rule for training/validation
train_df, test_df = train_test_split(orbits_df, test_size=0.3)

train_df.head()

Unnamed: 0,Object Name,Object Classification,Epoch (TDB),Orbit Axis (AU),Orbit Eccentricity,Orbit Inclination (deg),Perihelion Argument (deg),Node Longitude (deg),Mean Anomoly (deg),Perihelion Distance (AU),Aphelion Distance (AU),Orbital Period (yr),Minimum Orbit Intersection Distance (AU),Orbital Reference,Asteroid Magnitude
7565,(2010 VQ139),Amor Asteroid,55514,1.7095,0.3908,2.7045,178.1125,231.4126,1.0544,1.0413,2.38,2.24,0.0512,7,27.12
12387,(2015 FM120),Amor Asteroid,57800,1.2079,0.1555,6.2036,133.3312,14.6413,182.0671,1.0201,1.4,1.33,0.0532,6,26.2
167,11054 (1991 FA),Amor Asteroid,57800,1.9786,0.4474,3.077,92.1098,339.2283,162.926,1.0934,2.86,2.78,0.1217,237,17.0
11845,(2014 WK368),Apollo Asteroid (Hazard),57800,2.1566,0.712,14.2912,105.1031,237.2535,269.7616,0.6211,3.69,3.17,0.0346,4,21.4
3702,(2004 JU20),Amor Asteroid,53139,1.4957,0.2759,5.6083,138.6207,52.5559,23.941,1.0831,1.91,1.83,0.0987,5,25.3


In [1]:
# Split the respective targets
train_targets = targets[train_df.index]
test_targets = targets[test_df.index]


NameError: name 'targets' is not defined