In [3]:
import pandas as pd

# Load the CSV file
csv_file_path = 'updated_all_columns.csv'
data = pd.read_csv(csv_file_path)

# Preview the first few rows of the data to understand its structure
data.head()


Unnamed: 0,rowid,pl_name,hostname,pl_letter,hd_name,hip_name,tic_id,gaia_id,sy_snum,sy_pnum,...,sy_kepmagerr1,sy_kepmagerr2,sy_kepmag_reflink,pl_nnotes,st_nphot,st_nrvc,st_nspec,pl_nespec,pl_ntranspec,pl_ndispec
0,1,11 Com b,11 Com,b,HD 107383,HIP 60202,TIC 72437047,Gaia DR2 3946945413106333696,2,1,...,0.0,0.0,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,2.0,1,2,0,0,0,0
1,2,11 UMi b,11 UMi,b,HD 136726,HIP 74793,TIC 230061010,Gaia DR2 1696798367260229376,1,1,...,0.0,0.0,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,0.0,1,1,0,0,0,0
2,3,14 And b,14 And,b,HD 221345,HIP 116076,TIC 333225860,Gaia DR2 1920113512486282240,1,1,...,0.0,0.0,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,0.0,1,1,0,0,0,0
3,4,14 Her b,14 Her,b,HD 145675,HIP 79248,TIC 219483057,Gaia DR2 1385293808145621504,1,2,...,0.0,0.0,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,0.0,1,4,1,0,0,0
4,5,16 Cyg B b,16 Cyg B,b,HD 186427,HIP 96901,TIC 27533327,Gaia DR2 2135550755683407232,3,1,...,0.0,0.0,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,5.0,1,4,3,0,0,0


In [4]:
import json

# Columns needed for the JSON
star_system_columns = [
    "hostname", "sy_snum", "sy_pnum", "sy_mnum", "cb_flag",
    "st_spectype", "st_teff", "st_rad", "st_mass", "st_met", "st_lum", 
    "st_logg", "st_age", "st_dens", "st_vsin", "st_rotp", "st_radv",
    "sy_pm", "sy_pmra", "sy_pmdec", "sy_dist", "sy_plx", "ra", "dec", 
    "glat", "glon", "elat", "elon"
]

planet_columns = [
    "pl_name", "discoverymethod", "disc_facility", "disc_telescope", 
    "pl_orbper", "pl_orbsmax", "pl_angsep", "pl_rade", 
    "pl_bmasse", "pl_dens", "pl_orbeccen", "pl_insol", "pl_eqt", 
    "pl_orbincl", "ttv_flag", "pl_imppar", "pl_trandep", "pl_trandur", 
    "pl_ratdor", "pl_ratror", "pl_occdep", "pl_orbtper", "pl_orblper", 
    "pl_rvamp", "pl_projobliq", "pl_trueobliq"
]

# Function to generate JSON structure for star systems and planets
def generate_star_system_json(df):
    star_systems = []

    grouped = df.groupby("hostname")
    for hostname, group in grouped:
        # Get the star system data from the first row in the group
        star_system_data = group.iloc[0][star_system_columns].to_dict()

        # Extract planets for this star system
        planets = []
        for _, row in group.iterrows():
            planet_data = row[planet_columns].to_dict()
            planets.append(planet_data)

        star_system_data["planets"] = planets
        star_systems.append(star_system_data)
    
    return {"starSystems": star_systems}

# Filter the necessary columns for both star systems and planets
filtered_data = data[star_system_columns + planet_columns]

# Generate the JSON data
star_system_json = generate_star_system_json(filtered_data)

# Save to JSON file
output_file_path = 'star_systems_data.json'
with open(output_file_path, 'w') as f:
    json.dump(star_system_json, f, indent=2)

# Output the path to the generated file
output_file_path


'star_systems_data.json'