In [33]:
import astropy as ast
from astropy.coordinates import solar_system_ephemeris, EarthLocation, GeocentricTrueEcliptic, get_body, SkyCoord, Distance
from astroplan.moon import moon_phase_angle
from collections import defaultdict
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook

### Generating Data From Celestial Bodies

In [34]:
# All of the celestial bodies to generate synthetic data on
BODY_NAMES = ['mercury', 'venus', 'mars', 'jupiter', 'saturn', 'moon', 'sun']

In [35]:
def get_coordinates(body):
    # Takes a Skycoord object, returns (theta, phi, r, x, y, z) in (deg, deg, AU, AU, AU, AU)
    angles = [float(i) for i in body.to_string().split(' ')]
    body_dist_string = body.distance.to_string()
    r = float(body_dist_string[:-3])
    units = body_dist_string[-2:]
    if units == 'km':
        r /= 1.496e+8
    
    phi = angles[0]
    theta = angles[1]

    # Extract the Cartesian coordinates from the SkyCoord object
    c = body.cartesian
    x = c.x.value
    y = c.y.value
    z = c.z.value

    body_dist_string = body.distance.to_string()
    units = body_dist_string[-2:]
    
    # Convert from km to AU if necessary
    if units == 'km':
        x /= 1.496e+8
        y /= 1.496e+8
        z /= 1.496e+8
    return (phi, theta, r, x, y, z)

In [56]:
def random_dates(start, end, n=10):

    start_u = start.value//10**9
    end_u = end.value//10**9

    return pd.to_datetime(np.random.randint(start_u, end_u, n), unit='s')

def add_noise(coords):
    # Takes a tuple of (theta, phi, r, x, y, z) in (deg, deg, AU, AU, AU, AU)
    # and returns a noisified version of the data
    # Currently doesn't add noise to r
    theta, phi, r, x, y, z = coords
    return (theta + np.random.normal(0,1), phi + np.random.normal(0,1), r + np.random.normal(0, 0.1),
            x + np.random.normal(0, 0.1), y + np.random.normal(0, 0.1), z + np.random.normal(0, 0.1))

# Range is 150 years, almost the limit available to us from the astropy API
times = pd.date_range(start="1995-01-01-00-00-00", end="2000-01-01-00-00-00", freq='1D')

start = pd.to_datetime('1995-01-01-00-00-00')
end = pd.to_datetime('2000-01-01-00-00-00')
times_rand = random_dates(start, end, n=4000).sort_values()

# Location is the Medicina Radio Observatory, located in Italy. Chosen for proximity to Greece
loc = EarthLocation.of_site('medicina')
rows = defaultdict(list)


In [57]:
times_rand

DatetimeIndex(['1995-01-03 19:07:47', '1995-01-03 20:17:40',
               '1995-01-04 11:52:44', '1995-01-04 14:24:05',
               '1995-01-05 12:46:54', '1995-01-05 18:39:30',
               '1995-01-07 05:03:40', '1995-01-08 06:08:07',
               '1995-01-08 07:34:36', '1995-01-08 10:54:00',
               ...
               '1999-12-27 18:58:15', '1999-12-28 16:39:02',
               '1999-12-28 18:36:39', '1999-12-29 22:50:49',
               '1999-12-30 10:31:59', '1999-12-30 16:29:28',
               '1999-12-30 18:32:38', '1999-12-31 11:23:36',
               '1999-12-31 14:25:22', '1999-12-31 15:50:34'],
              dtype='datetime64[ns]', length=4000, freq=None)

In [58]:
# Generate coordinate data in terms of spherical Geocentric Celestial Reference System (GCRS), default for astropy
for time in tqdm_notebook(times_rand):
    time = ast.time.Time(time.to_pydatetime())
    bodies = []
    
    with solar_system_ephemeris.set('builtin'):
        for body_name in BODY_NAMES:
            bodies.append(get_body(body_name, time, loc))

    rows['time'].append(time)
    rows['location'].append(str(loc))
    rows['moon_phase'].append(moon_phase_angle(time).value)

    for body_name, body in zip(BODY_NAMES, bodies):
        coordinates = add_noise(get_coordinates(body))
        coord_strings = ['theta', 'phi', 'r', 'x', 'y', 'z']
        for i in range(len(coord_strings)):
            c = coordinates[i]
            rows[body_name + '_' + coord_strings[i]].append(c)

celestial_bodies = pd.DataFrame(rows)
celestial_bodies.to_csv('five_year_data_rand_4k.csv', index=False)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4000.0), HTML(value='')))




In [60]:
# Load the generated data (which needs to be converted to the geocentric ecliptic coordinate system)
data = pd.read_csv('five_year_data_rand_4k.csv')

In [61]:
# Convert coordinates to the standard geocentric true ecliptic coordinate system
# See https://docs.astropy.org/en/stable/api/astropy.coordinates.GeocentricTrueEcliptic.html for more documentation

rows = defaultdict(list)
for name in tqdm_notebook(BODY_NAMES):
    phi_col = data[name + '_phi']
    theta_col = data[name + '_theta']
    r_col = data[name + '_r']
    for phi, theta, r in zip(phi_col, theta_col, r_col):
        ecliptic = SkyCoord(theta, phi, abs(r), frame='gcrs', unit=('deg', 'deg', 'AU')).transform_to(GeocentricTrueEcliptic())
        coordinates = get_coordinates(ecliptic)
        coord_strings = ['lambda', 'beta', 'delta', 'x', 'y', 'z']
        for i in range(len(coord_strings)):
            c = coordinates[i]
            rows[name + '_' + coord_strings[i]].append(c)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=7.0), HTML(value='')))




In [62]:
# Prepare the final dataset as final_data.csv

final_data = pd.DataFrame(rows)
final_data['time'] = data['time'] # Time is given in yyyy-mm-dd hh:mm:ss format
final_data['location'] = data['location'] # Location is given as (longitude, latitude, height) in m
final_data['moon_phase'] = data['moon_phase'] # Moon phase 
final_data.set_index('time', inplace=True)
final_data.to_csv('final_five_year_data_rand_4k.csv') # All other columns are the spherical and Cartesian geocentric true ecliptic coordinate system values as described in the writeup

In [63]:
final_data[:]

Unnamed: 0_level_0,mercury_lambda,mercury_beta,mercury_delta,mercury_x,mercury_y,mercury_z,venus_lambda,venus_beta,venus_delta,venus_x,...,moon_y,moon_z,sun_lambda,sun_beta,sun_delta,sun_x,sun_y,sun_z,location,moon_phase
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1995-01-03 19:07:47,294.866,-0.552277,1.216875,0.511667,-1.104012,-0.011729,236.971,4.76800,0.657228,-0.356994,...,-0.035454,0.003709,283.415,0.708785,0.916671,0.212658,-0.891591,0.011340,"(4461340.48313723, 919588.07009129, 4449530.22...",2.581318
1995-01-03 20:17:40,295.015,-3.674560,1.416766,0.597855,-1.281230,-0.090799,236.296,3.14608,0.650170,-0.360239,...,-0.063127,0.008953,285.446,-0.348421,1.023460,0.272577,-0.986475,-0.006224,"(4461340.48313723, 919588.07009129, 4449530.22...",2.570380
1995-01-04 11:52:44,295.458,-2.808220,1.281689,0.550263,-1.155852,-0.062794,237.404,4.33002,0.552982,-0.297051,...,-0.021643,0.003176,283.669,-1.088400,0.813486,0.192201,-0.790304,-0.015452,"(4461340.48313723, 919588.07009129, 4449530.22...",2.425523
1995-01-04 14:24:05,296.083,-3.427370,1.361550,0.597562,-1.220702,-0.081398,236.220,3.76935,0.817890,-0.453767,...,-0.026985,0.003061,284.697,-1.189410,0.925959,0.234867,-0.895471,-0.019221,"(4461340.48313723, 919588.07009129, 4449530.22...",2.402356
1995-01-05 12:46:54,298.036,-1.809380,1.281460,0.602024,-1.130518,-0.040461,238.364,4.69275,0.623361,-0.325873,...,-0.031115,0.007527,284.352,1.168290,0.949182,0.235228,-0.919370,0.019353,"(4461340.48313723, 919588.07009129, 4449530.22...",2.200448
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1999-12-30 16:29:28,268.958,-1.639300,1.410069,-0.025633,-1.409259,-0.040338,239.104,1.77185,0.962606,-0.494043,...,-0.024805,0.005173,279.594,-0.725973,0.922299,0.153702,-0.909326,-0.011686,"(4461340.48313723, 919588.07009129, 4449530.22...",1.788980
1999-12-30 18:32:38,270.068,-1.120150,1.504713,0.001793,-1.504424,-0.029416,241.031,0.24035,1.341766,-0.649855,...,-0.063809,0.014619,278.055,0.499126,0.782508,0.109648,-0.774757,0.006817,"(4461340.48313723, 919588.07009129, 4449530.22...",1.805876
1999-12-31 11:23:36,270.426,-0.450896,1.303761,0.009686,-1.303685,-0.010260,239.909,2.47861,1.140231,-0.571155,...,-0.063243,0.007335,278.643,-1.057710,1.031319,0.154963,-1.019433,-0.019038,"(4461340.48313723, 919588.07009129, 4449530.22...",1.943367
1999-12-31 14:25:22,271.368,-0.200693,1.317498,0.031460,-1.317114,-0.004615,241.379,2.45934,1.128559,-0.540097,...,-0.013578,0.002255,279.116,0.610643,1.022048,0.161923,-1.009081,0.010893,"(4461340.48313723, 919588.07009129, 4449530.22...",1.967882
