In [1]:
import pandas as pd

# Load the uploaded CSV file
file_path = 'updated_all_columns.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the data to understand its structure
data.head()


Unnamed: 0,rowid,pl_name,hostname,pl_letter,hd_name,hip_name,tic_id,gaia_id,sy_snum,sy_pnum,...,sy_kepmagerr1,sy_kepmagerr2,sy_kepmag_reflink,pl_nnotes,st_nphot,st_nrvc,st_nspec,pl_nespec,pl_ntranspec,pl_ndispec
0,1,11 Com b,11 Com,b,HD 107383,HIP 60202,TIC 72437047,Gaia DR2 3946945413106333696,2,1,...,0.0,0.0,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,2.0,1,2,0,0,0,0
1,2,11 UMi b,11 UMi,b,HD 136726,HIP 74793,TIC 230061010,Gaia DR2 1696798367260229376,1,1,...,0.0,0.0,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,0.0,1,1,0,0,0,0
2,3,14 And b,14 And,b,HD 221345,HIP 116076,TIC 333225860,Gaia DR2 1920113512486282240,1,1,...,0.0,0.0,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,0.0,1,1,0,0,0,0
3,4,14 Her b,14 Her,b,HD 145675,HIP 79248,TIC 219483057,Gaia DR2 1385293808145621504,1,2,...,0.0,0.0,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,0.0,1,4,1,0,0,0
4,5,16 Cyg B b,16 Cyg B,b,HD 186427,HIP 96901,TIC 27533327,Gaia DR2 2135550755683407232,3,1,...,0.0,0.0,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,5.0,1,4,3,0,0,0


In [2]:
import numpy as np

# Function to determine habitability dynamically
def adjust_habitability_thresholds(data):
    # Dynamically determine thresholds for planet radius, stellar temperature, and semi-major axis
    semi_major_axis_min = data['pl_orbsmax'].quantile(0.1)  # Adjust for at least 20% habitable
    semi_major_axis_max = data['pl_orbsmax'].quantile(0.9)
    planet_radius_min = data['pl_rade'].quantile(0.1)
    planet_radius_max = data['pl_rade'].quantile(0.9)
    stellar_temp_min = data['st_teff'].quantile(0.1)
    stellar_temp_max = data['st_teff'].quantile(0.9)
    planet_density_min = data['pl_dens'].quantile(0.2)  # Ensure rocky planets

    return {
        "semi_major_axis_min": semi_major_axis_min,
        "semi_major_axis_max": semi_major_axis_max,
        "planet_radius_min": planet_radius_min,
        "planet_radius_max": planet_radius_max,
        "stellar_temp_min": stellar_temp_min,
        "stellar_temp_max": stellar_temp_max,
        "planet_density_min": planet_density_min,
    }

# Apply the habitability logic based on the dynamic thresholds
def is_habitable_with_dynamic_thresholds(row, thresholds):
    semi_major_axis = row.get('pl_orbsmax', None)
    planet_radius = row.get('pl_rade', None)
    stellar_temp = row.get('st_teff', None)
    planet_density = row.get('pl_dens', None)
    
    # If critical parameters are missing, mark the planet as not habitable
    if semi_major_axis is None or planet_radius is None or stellar_temp is None or planet_density is None:
        return False

    # Retrieve the dynamic thresholds
    semi_major_axis_min = thresholds['semi_major_axis_min']
    semi_major_axis_max = thresholds['semi_major_axis_max']
    planet_radius_min = thresholds['planet_radius_min']
    planet_radius_max = thresholds['planet_radius_max']
    stellar_temp_min = thresholds['stellar_temp_min']
    stellar_temp_max = thresholds['stellar_temp_max']
    planet_density_min = thresholds['planet_density_min']
    
    # Check conditions for habitability
    within_habitable_zone = (semi_major_axis_min <= semi_major_axis <= semi_major_axis_max)
    suitable_size = (planet_radius_min <= planet_radius <= planet_radius_max)
    suitable_stellar_temp = (stellar_temp_min <= stellar_temp <= stellar_temp_max)
    rocky_planet = (planet_density >= planet_density_min)

    # A planet is considered habitable if it satisfies all conditions
    return within_habitable_zone and suitable_size and suitable_stellar_temp and rocky_planet

# Adjust thresholds dynamically for the dataset
thresholds = adjust_habitability_thresholds(data)

# Apply the habitability function based on the adjusted thresholds
data['habitable_zone'] = data.apply(lambda row: is_habitable_with_dynamic_thresholds(row, thresholds), axis=1)

# Count how many planets are habitable and how many are not
habitable_count = data['habitable_zone'].sum()
non_habitable_count = len(data) - habitable_count

# Display the counts
print(f"Habitable planets: {habitable_count}")
print(f"Non-habitable planets: {non_habitable_count}")


Habitable planets: 2691
Non-habitable planets: 3074


In [3]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Example DataFrame 'data' (assuming it's already loaded)
# data = pd.read_csv('your_file.csv')  # Load your dataset here

# List of columns for planets
planet_columns = [
    'pl_orbsmax',    # semi_major_axis
    'pl_orbeccen',   # eccentricity
    'pl_orbper',     # orbital_period
    'pl_orbincl',    # inclination
    'pl_rade',       # radius
    'pl_angsep'      # angular_separation
]

# List of columns for star systems
star_system_columns = [
    'sy_dist',       # sy_dist
    'ra',            # ra
    'dec',           # dec
    'st_teff',       # st_teff
    'st_lum',        # st_lum
    'pl_imppar',     # distance_from_center
    'st_vsin'        # rotation_speed
]

# Create a MinMaxScaler object for normalization
scaler = MinMaxScaler()

# Normalize planet columns
data[planet_columns] = scaler.fit_transform(data[planet_columns])

# Normalize star system columns
data[star_system_columns] = scaler.fit_transform(data[star_system_columns])

# Define scale factors for speed adjustment (example: larger planets move faster)
size_scale_factor = 1.5  # Factor for scaling the speed for larger planets
data['st_vsin'] = data['st_vsin'] * size_scale_factor

# If you want to maintain the distinguishable differences in speeds
# you can use a more complex scaling strategy, e.g., by introducing
# a custom function based on the radius of the planets

def adjust_speed(row):
    if row['pl_rade'] > 1.0:  # Assuming 1.0 is the threshold for larger planets
        return row['st_vsin'] * 1.5  # Larger planet moves faster
    else:
        return row['st_vsin'] * 0.8  # Smaller planet moves slower

# Update the rotation speed based on the planet size
data['st_vsin'] = data.apply(adjust_speed, axis=1)

# Output the updated DataFrame
print(data.head())

# Optionally save the updated DataFrame to a new CSV
# data.to_csv('updated_planet_star_system_data.csv', index=False)


   rowid     pl_name  hostname pl_letter    hd_name    hip_name  \
0      1    11 Com b    11 Com         b  HD 107383   HIP 60202   
1      2    11 UMi b    11 UMi         b  HD 136726   HIP 74793   
2      3    14 And b    14 And         b  HD 221345  HIP 116076   
3      4    14 Her b    14 Her         b  HD 145675   HIP 79248   
4      5  16 Cyg B b  16 Cyg B         b  HD 186427   HIP 96901   

          tic_id                       gaia_id  sy_snum  sy_pnum  ...  \
0   TIC 72437047  Gaia DR2 3946945413106333696        2        1  ...   
1  TIC 230061010  Gaia DR2 1696798367260229376        1        1  ...   
2  TIC 333225860  Gaia DR2 1920113512486282240        1        1  ...   
3  TIC 219483057  Gaia DR2 1385293808145621504        1        2  ...   
4   TIC 27533327  Gaia DR2 2135550755683407232        3        1  ...   

   sy_kepmagerr2                                  sy_kepmag_reflink pl_nnotes  \
0            0.0  <a refstr=STASSUN_ET_AL__2019 href=https://ui....       2.0

In [4]:
import json

# Define the columns mapping based on the given specification
columns_mapping = {
    'pl_name': 'pname',
    'pl_orbsmax': 'semi_major_axis',
    'pl_orbeccen': 'eccentricity',
    'pl_orbper': 'orbital_period',
    'pl_orbincl': 'inclination',
    'pl_rade': 'radius',
    'pl_angsep': 'angular_separation',
    'hostname': 'hname',
    'sy_dist': 'sy_dist',
    'ra': 'ra',
    'dec': 'dec',
    'st_teff': 'st_teff',
    'st_lum': 'st_lum',
    'pl_imppar': 'distance_from_center',
    'st_vsin': 'rotation_speed'
}

# Select relevant columns and take the first 1000 planets, including the 'habitable_zone' column
relevant_columns = list(columns_mapping.keys()) + ['habitable_zone']
data_subset = data[relevant_columns].head(10)

# Group by star system (hostname) and structure the JSON format
galaxy = {
    "galaxy": {
        "center_x": 0,
        "center_y": 0,
        "center_z": 0,
        "rotation_speed": 0.005
    },
    "starSystems": []
}

star_systems = {}

# Process each planet row and add it to its respective star system
for index, row in data_subset.iterrows():
    star_system_name = row['hostname']
    
    # Create new star system if it doesn't exist
    if star_system_name not in star_systems:
        star_systems[star_system_name] = {
            "name": star_system_name,
            "sy_dist": row['sy_dist']*1000,
            "ra": row['ra']*100,
            "dec": row['dec']/100,
            "st_teff": row['st_teff']*100000,
            "st_lum": row['st_lum']*10,
            "distance_from_center": row['pl_imppar']*10000,
            "rotation_speed": row['st_vsin'],
            "planets": []
        }
    
    # Add planet details to the star system, using the 'habitable_zone' column for the correct value
    planet = {
        "pname": row['pl_name'],
        "semi_major_axis": row['pl_orbsmax']*10e5,
        "eccentricity": row['pl_orbeccen'],
        "orbital_period": row['pl_orbper'],
        "inclination": row['pl_orbincl'],
        "habitable_zone": bool(row['habitable_zone']),  # Use the calculated habitable_zone value
        "radius": row['pl_rade'],
        "angular_separation": row['pl_angsep']/10,
        "texture": None  # Constant value as per instructions
    }
    
    star_systems[star_system_name]["planets"].append(planet)

# Add all star systems to the galaxy structure
galaxy["starSystems"] = list(star_systems.values())

# Generate JSON content
json_output = json.dumps(galaxy, indent=4)

# Save the JSON content to a file
json_file_path = 'system_10.json'
with open(json_file_path, 'w') as json_file:
    json_file.write(json_output)

# Output the path of the saved file
json_file_path


'system_10.json'

In [5]:
# import numpy as np
# import pandas as pd

# # Sample DataFrame initialization (assuming data is already loaded)
# # data = pd.read_csv('your_data.csv')  # Load your dataset here
# # For demonstration, we'll create a mock DataFrame similar to yours
# # Assuming the DataFrame has been set up as described in previous steps
# data_subset = data.head(100)

# # Normalize the first 100 rows
# # Using standardization instead of Min-Max Scaling for more complex fitting
# def normalize_column(column):
#     mean = np.mean(column)
#     std = np.std(column)
#     return (column - mean) / std

# # Apply normalization to relevant columns
# data_subset['semi_major_axis'] = normalize_column(data_subset['pl_orbsmax'])
# data_subset['eccentricity'] = normalize_column(data_subset['pl_orbeccen'])
# data_subset['orbital_period'] = normalize_column(data_subset['pl_orbper'])
# data_subset['inclination'] = normalize_column(data_subset['pl_orbincl'])
# data_subset['radius'] = normalize_column(data_subset['pl_rade'])
# data_subset['angular_separation'] = normalize_column(data_subset['pl_angsep'])

# # Update star system features
# data_subset['sy_dist'] = normalize_column(data_subset['sy_dist'])
# data_subset['ra'] = normalize_column(data_subset['ra'])
# data_subset['dec'] = normalize_column(data_subset['dec'])
# data_subset['st_teff'] = normalize_column(data_subset['st_teff'])
# data_subset['st_lum'] = normalize_column(data_subset['st_lum'])
# data_subset['distance_from_center'] = normalize_column(data_subset['pl_imppar'])
# data_subset['rotation_speed'] = normalize_column(data_subset['st_vsin'])

# # Ensure no two star systems overlap
# def adjust_sizes(data_subset, threshold=0.5):
#     for i in range(len(data_subset)):
#         for j in range(i + 1, len(data_subset)):
#             # Calculate the distance based on the normalized sizes
#             distance = np.abs(data_subset.loc[i, 'sy_dist'] - data_subset.loc[j, 'sy_dist'])
#             if distance < threshold:
#                 # Reduce the size of the larger star system
#                 if data_subset.loc[i, 'radius'] > data_subset.loc[j, 'radius']:
#                     data_subset.loc[i, 'radius'] -= (threshold - distance) / 2
#                 else:
#                     data_subset.loc[j, 'radius'] -= (threshold - distance) / 2

# # Adjust sizes to prevent overlap
# adjust_sizes(data_subset)

# # Update original data DataFrame with modified values
# data.update(data_subset)

# # Display the updated DataFrame for verification
# print(data.head(100))  # Display first 100 rows to check the updates


In [6]:
# import pandas as pd

# # Assume 'data' is your DataFrame containing the necessary columns

# # Normalization functions
# def normalize_planet_features(data):
#     data['pl_orbsmax'] = data['pl_orbsmax'] * 100  # semi_major_axis
#     data['pl_orbeccen'] = data['pl_orbeccen']  # eccentricity remains the same
#     data['pl_orbper'] = data['pl_orbper'] / 100  # orbital_period
#     data['pl_orbincl'] = data['pl_orbincl']  # inclination remains the same
#     data['pl_rade'] = data['pl_rade'] / 100  # radius
#     data['pl_angsep'] = data['pl_angsep']  # angular_separation remains the same

# def normalize_star_system_features(data):
#     data['sy_dist'] = data['sy_dist'] * 100  # sy_dist
#     data['ra'] = data['ra']  # ra remains the same
#     data['dec'] = data['dec']  # dec remains the same
#     data['st_teff'] = data['st_teff'] / 100  # st_teff
#     data['st_lum'] = data['st_lum']  # st_lum remains the same
#     data['pl_imppar'] = data['pl_imppar'] * 100  # distance_from_center
#     data['st_vsin'] = data['st_vsin'] / 100  # rotation_speed

# # Normalize planet features
# normalize_planet_features(data)

# # Normalize star system features
# normalize_star_system_features(data)

# # Output the modified DataFrame
# print(data.head())
