In [19]:
import requests
import pandas as pd
from io import StringIO

# Define the base URL for the TAP service
base_url = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync?query="


In [20]:
# Function to check API limits
def check_api_limit(response):
    if 'X-RateLimit-Remaining' in response.headers:
        remaining = int(response.headers['X-RateLimit-Remaining'])
        print(f"API calls remaining: {remaining}")
        return remaining
    return None


In [21]:
# Unified function to fetch data
def fetch_data(query, format_type='csv'):
    full_url = base_url + query.replace(" ", "+") + f"&format={format_type}"
    response = requests.get(full_url)

    if response.status_code == 200:
        remaining_calls = check_api_limit(response)
        if remaining_calls is not None and remaining_calls <= 0:
            print("API limit reached. No more requests can be made.")
            return None

        return pd.read_csv(StringIO(response.text), low_memory=False) if format_type == 'csv' else response.json()
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None


In [22]:
# Queries
queries = {
    "confirmed_planets": "SELECT * FROM ps WHERE default_flag = 1",
    "unconfirmed_tess": "SELECT * FROM toi",
    "unconfirmed_koi": "SELECT * FROM cumulative WHERE koi_disposition LIKE 'CANDIDATE'",
    "unconfirmed_k2": "SELECT * FROM k2pandc",
    "stellarhosts": "SELECT * FROM exo_tap.stellarhosts",
    "pscomppars": "SELECT * FROM exo_tap.pscomppars",
    "transit_detection": "SELECT * FROM exo_tap.TD"
}


In [23]:
# Main function
def main():
    # Fetch data for all queries
    data_frames = {}
    for name, query in queries.items():
        data_frames[name] = fetch_data(query)

    # Save data to CSV files
    for name, df in data_frames.items():
        if df is not None:
            df.to_csv(f'{name}.csv', index=False)
            print(f"Data saved to {name}.csv")
        else:
            print(f"No data to save for {name}")

    # Display summaries of the data
    for name, df in data_frames.items():
        print(f"\n{name.replace('_', ' ').title()} Summary:")
        print(df.describe() if df is not None else "No data.")

    # Display the column names for confirmed planets
    if 'confirmed_planets' in data_frames and data_frames['confirmed_planets'] is not None:
        print("Columns in confirmed_planets.csv:")
        print(data_frames['confirmed_planets'].columns.tolist())


In [24]:
# Entry point
if __name__ == "__main__":
    main()


Data saved to confirmed_planets.csv
Data saved to unconfirmed_tess.csv
Data saved to unconfirmed_koi.csv
Data saved to unconfirmed_k2.csv
Data saved to stellarhosts.csv
Data saved to pscomppars.csv
Data saved to transit_detection.csv

Confirmed Planets Summary:
       default_flag    disc_year           ra          dec         glon  \
count        5785.0  5785.000000  5785.000000  5785.000000  5785.000000   
mean            1.0  2016.454105   235.097219    19.222305   133.186042   
std             0.0     4.671247    90.888679    36.295722    95.534254   
min             1.0  1992.000000     0.185606   -88.121111     0.029010   
25%             1.0  2014.000000   178.577997    -9.324970    74.069000   
50%             1.0  2016.000000   285.499284    39.683128    79.853620   
75%             1.0  2020.000000   293.372211    45.495327   206.472990   
max             1.0  2024.000000   359.974984    86.860343   359.996270   

              glat         elon         elat     pl_orbper  pl