In [2]:
import requests
import pandas as pd
import math
import json

In [5]:
# API URL
api_url = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync?query=select+*+from+pscomppars&format=json"

In [6]:
# Make get request to the API
response = requests.get(api_url)

In [8]:
# Import response into JSON file
if response.status_code == 200:
    # Parse JSON response
    data = response.json()

    # Save data to a JSON file with indentation for readability
    with open('exoplanet_data.json', 'w') as outfile:
        json.dump(data, outfile, indent=2)

    print("Data saved to exoplanet_data.json")
else:
    print("Error:", response.status_code)

Data saved to exoplanet_data.vot


In [6]:
# Check if the request was successful (status code 200?)
Planet_Name = []
Discovered_Year = []
Orbital_Period_days = []
SemiMajor_Axis_AU = []
Mass = []
Stellar_Luminosity = []
ra = []
dec = []
radius = []
density = []

if response.status_code == 200:
    # Parse JSON response
    data = response.json()
    # Test to print the first few records
    for entry in data:
        Planet_Name.append(entry.get("pl_name"))
        Discovered_Year.append(entry.get("disc_year"))
        Orbital_Period_days.append(entry.get("pl_orbper"))
        SemiMajor_Axis_AU.append(entry.get("pl_orbsmax"))
        Mass.append(entry.get("pl_bmasse"))
        radius.append(entry.get("pl_rade"))
        Stellar_Luminosity.append(entry.get("st_lum")) 
        ra.append(entry.get("ra"))  
        dec.append(entry.get("dec"))
        density.append(entry.get("pl_dens"))

else:
    print("Error:", response.status_code)


In [7]:
exoplanet_data = pd.DataFrame({'Planet Name':Planet_Name,'Discovery Year':Discovered_Year,'Orbital Period (days)':Orbital_Period_days,'SemiMajor Axis (AU)':SemiMajor_Axis_AU,'Mass (Earth Mass)':Mass,'Stellar_Luminosity':Stellar_Luminosity,'Right Ascension (RA)':ra,'Declination (Dec)':dec,'Radius':radius, 'Density': density})
#exoplanet_data = exoplanet_data.dropna()
print(len(exoplanet_data))
exoplanet_data.head(10)

5483


Unnamed: 0,Planet Name,Discovery Year,Orbital Period (days),SemiMajor Axis (AU),Mass (Earth Mass),Stellar_Luminosity,Right Ascension (RA),Declination (Dec),Radius,Density
0,OGLE-2016-BLG-1227L b,2020,,3.4,250.0,,265.597125,-33.759778,13.9,0.512
1,Kepler-276 c,2013,31.884,0.1994,16.6,-0.089,293.568197,39.036312,2.9,3.74
2,Kepler-829 b,2016,6.883376,0.0678,5.1,0.04,282.332831,42.463813,2.11,2.98
3,K2-283 b,2018,1.921036,0.0291,12.2,-0.524,13.194368,9.692918,3.52,1.54
4,Kepler-477 b,2016,11.119907,0.0911,4.94,-0.372,288.067445,42.355305,2.07,3.06
5,TOI-1260 c,2021,7.493134,0.0657,13.2,-0.889,157.144071,65.854199,2.76,3.45
6,HD 149143 b,2005,4.07182,0.053,422.7139,0.35,248.212671,2.084455,13.6,0.923
7,HD 210702 b,2007,354.1,1.148,574.63664,1.149,332.963864,16.040481,13.4,1.31
8,HIP 12961 b,2010,57.435,0.25,114.4188,-1.003,41.680075,-23.086005,13.2,0.273
9,HD 11755 b,2015,433.70001,1.09,1789.3829,2.169,29.707836,73.152137,12.8,4.69


In [8]:
exoplanet_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5483 entries, 0 to 5482
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Planet Name            5483 non-null   object 
 1   Discovery Year         5483 non-null   int64  
 2   Orbital Period (days)  5244 non-null   float64
 3   SemiMajor Axis (AU)    5188 non-null   float64
 4   Mass (Earth Mass)      5458 non-null   float64
 5   Stellar_Luminosity     5257 non-null   float64
 6   Right Ascension (RA)   5483 non-null   float64
 7   Declination (Dec)      5483 non-null   float64
 8   Radius                 5465 non-null   float64
 9   Density                5374 non-null   float64
dtypes: float64(8), int64(1), object(1)
memory usage: 428.5+ KB


In [9]:
exoplanet_data = exoplanet_data.dropna()
print(len(exoplanet_data))

4827


In [47]:
# Determine density range
density_values = exoplanet_data['Density']
mean_density = density_values.mean()
median_density = density_values.median()
stdev_density = density_values.std()
q1 = density_values.quantile(0.25)
q3 = density_values.quantile(0.75)
iqr = q3-q1
lowerlim = median_density-1.5*iqr
upperlim = median_density+1.5*iqr

# Check for if limit is negative
if lowerlim < 0:
    lowerlim = median_density

if upperlim < 0:
    upperlim = median_density

density_range = (lowerlim, upperlim)

print("Density Range:", density_range)
print("mean", mean_density)
print("median",median_density)
print ("stdev",stdev_density)
print("iqr",iqr)

Density Range: (2.62, 7.3900000000000015)
mean 3.9455643791174646
median 2.62
stdev 9.514964424661079
iqr 3.1800000000000006


In [53]:
# Filter for planets in the habitable zone
lum = exoplanet_data.Stellar_Luminosity
divisor_ri = 1.1
exoplanet_data['Ri (AU)'] = [math.sqrt(abs(x/divisor_ri)) for x in lum]
divisor_ro = 0.53
exoplanet_data['Ro (AU)'] = [math.sqrt(abs(x/divisor_ro)) for x in lum]
exoplanet_data['Habitable Zone'] = (exoplanet_data['Ri (AU)'] > 0.95) & (exoplanet_data['Ri (AU)'] < 1.37)

# Filter for planets with radius between 0.8 and 1.2 times the radius of Earth
exoplanet_data['Earth-like Radius'] = (exoplanet_data['Radius'] > 0.8) & (exoplanet_data['Radius'] < 1.2)

# Filter for planets with mass between 0.5 and 2.0 times the mass of Earth
exoplanet_data['Earth-like Mass'] = (exoplanet_data['Mass (Earth Mass)'] > 0.5) & (exoplanet_data['Mass (Earth Mass)'] < 2.0)

# Filter for planets with density similar to Earth's taking into account of the statistics of the dataframe
earth_density = 5.512  # g/cm³
range = 3 # g/cm³
exoplanet_data['Rocky'] = (
    (exoplanet_data['Density'] >= earth_density - range) &
    (exoplanet_data['Density'] <= earth_density + range))

# Combine all the criteria to find Earth-like planets
earthlike_planets = exoplanet_data[
    exoplanet_data['Habitable Zone'] &
    exoplanet_data['Earth-like Radius'] &
    exoplanet_data['Earth-like Mass'] &
    exoplanet_data['Rocky']
]

# Calculate the similarity score based on radius and mass (smaller score indicates more Earth-like)
earthlike_planets['Similarity Score'] = abs(earthlike_planets['Radius'] - 1.0) + abs(earthlike_planets['Mass (Earth Mass)'] - 1.0)

# Sort the Earth-like planets based on their similarity to Earth (radius and mass)
earthlike_planets = earthlike_planets.sort_values(by='Similarity Score', ascending=False)

# Display the top 5 most Earth-like planets
print(earthlike_planets[['Planet Name', 'Radius', 'Mass (Earth Mass)', 'Density', 'Similarity Score']])
#most_earthlike_planets = earthlike_planets.head(10)
#print(most_earthlike_planets[['Planet Name', 'Radius', 'Mass (Earth Mass)', 'Density', 'Similarity Score']])


            Planet Name  Radius  Mass (Earth Mass)  Density  Similarity Score
4594      Kepler-1735 b   1.194              1.840     5.94             1.034
423           GJ 3138 b   1.180              1.780     5.95             0.960
1287      Kepler-1512 b   1.180              1.760     5.89             0.940
4936      Kepler-1902 b   1.176              1.740     5.88             0.916
2400           GJ 393 b   1.170              1.710     5.87             0.880
3741       Kepler-186 f   1.170              1.710     5.87             0.880
5026         TOI-1442 b   1.170              1.710     5.87             0.880
3313           GJ 581 e   1.170              1.700     5.83             0.870
3199          GJ 3929 b   1.090              1.750     7.30             0.840
2571            K2-72 c   1.160              1.650     5.81             0.810
1055           K2-122 b   1.160              1.650     5.81             0.810
4772            K2-54 b   1.150              1.600     5.78     

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  earthlike_planets['Similarity Score'] = abs(earthlike_planets['Radius'] - 1.0) + abs(earthlike_planets['Mass (Earth Mass)'] - 1.0)


In [55]:
earthlike_planets.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 44 entries, 4594 to 292
Data columns (total 17 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Planet Name            44 non-null     object 
 1   Discovery Year         44 non-null     int64  
 2   Orbital Period (days)  44 non-null     float64
 3   SemiMajor Axis (AU)    44 non-null     float64
 4   Mass (Earth Mass)      44 non-null     float64
 5   Stellar_Luminosity     44 non-null     float64
 6   Right Ascension (RA)   44 non-null     float64
 7   Declination (Dec)      44 non-null     float64
 8   Radius                 44 non-null     float64
 9   Density                44 non-null     float64
 10  Ri (AU)                44 non-null     float64
 11  Ro (AU)                44 non-null     float64
 12  Habitable Zone         44 non-null     bool   
 13  Earth-like Radius      44 non-null     bool   
 14  Earth-like Mass        44 non-null     bool   
 15  Rock

In [56]:
most_similar_planets = earthlike_planets[earthlike_planets['Similarity Score'] > 0.8]
print(most_similar_planets[['Planet Name', 'Radius', 'Mass (Earth Mass)', 'Density', 'Similarity Score']])

        Planet Name  Radius  Mass (Earth Mass)  Density  Similarity Score
4594  Kepler-1735 b   1.194               1.84     5.94             1.034
423       GJ 3138 b   1.180               1.78     5.95             0.960
1287  Kepler-1512 b   1.180               1.76     5.89             0.940
4936  Kepler-1902 b   1.176               1.74     5.88             0.916
2400       GJ 393 b   1.170               1.71     5.87             0.880
3741   Kepler-186 f   1.170               1.71     5.87             0.880
5026     TOI-1442 b   1.170               1.71     5.87             0.880
3313       GJ 581 e   1.170               1.70     5.83             0.870
3199      GJ 3929 b   1.090               1.75     7.30             0.840
2571        K2-72 c   1.160               1.65     5.81             0.810
1055       K2-122 b   1.160               1.65     5.81             0.810


In [57]:
# Convert earthlike dataframe into json format
earthlike_planets_json = earthlike_planets.to_json(orient='records')

# Save data to a JSON file 
with open('earthlike_planets.json', 'w') as outfile:
    json.dump(earthlike_planets_json, outfile, indent=2)