In [1]:
# Import the different libraries:

import pandas as pd
import requests
import json
from bs4 import BeautifulSoup
import matplotlib as plt

In [2]:
# Get the URL for the API-Request on the exoplanets from NASA:
# Check out this website for further information:
# https://exoplanetarchive.ipac.caltech.edu/docs/program_interfaces.html#data

base_url = "https://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/nph-nstedAPI?"

# The extension for Exoplanets table:

exo_table = "table=exoplanets"

# Check for columns: in this case, its default. Otherwise seperate with a ,

exo_columns = "getdefaultcolumns"

# Dont forget the seperator:

seperator = "&"

# Pick a format: csv, txt, json, etc.

exo_format = "format=json"






In [3]:
# Make a json file from the request of the URL:

exoplanets = requests.get(base_url+exo_table+seperator+exo_columns+seperator+exo_format).json()

In [4]:
# Turn the list of dictionaries into a dataframe:
df = pd.DataFrame(exoplanets)

In [5]:
# Make a Copy so we have original data in the end still:

exo_nasa = df.copy()

In [6]:
# Write the df to a csv file:
exo_nasa.to_csv("data/nasa/exoplanet_nasa.csv")

In [7]:
# Open the csv file from the exoplanets website: data/exoplanets/exoplanets.csv
exo_eu = pd.read_csv("data/exoplanets/exoplanets_eu.csv")

In [8]:
exo_eu.shape

(4241, 98)

In [9]:
exo_nasa.shape

(4144, 82)

In [10]:
# Shape of first df is bigger then the 2nd one, that seems weird. Lets figure out the missing ones in the NASA DF:
nasa_name_list = exo_nasa["pl_name"].to_list()
eu_name_list = exo_eu["# name"].to_list()

In [11]:
# Check for items in EU list which is not in NASA list.
not_in_nasa = []
for items in eu_name_list:
    if items not in nasa_name_list:
        not_in_nasa.append(items)

not_in_eu = []
for items in nasa_name_list:
    if items not in eu_name_list:
        not_in_eu.append(items)

In [12]:
len(not_in_nasa)

693

In [13]:
len(not_in_eu)

596

In [14]:
(len(exo_eu)-len(exo_nasa)) == (len(not_in_nasa)-len(not_in_eu))

True

In [15]:
not_in_eu

['HD 142 b',
 'HD 142 c',
 'HD 4113 b',
 'HD 4732 b',
 'HD 4732 c',
 'HD 7449 b',
 'HD 8673 b',
 'HD 16141 b',
 'HD 19994 b',
 'HD 20794 c',
 'HD 28254 b',
 'HD 30856 b',
 'HD 39091 b',
 'HD 43691 b',
 'HD 46375 b',
 'HD 65216 b',
 'HD 65216 c',
 'HD 70573 b',
 'HD 86081 b',
 'HD 100546 b',
 'HD 102365 b',
 'HD 103774 b',
 'HD 106906 b',
 'HD 108341 b',
 'PH1 b',
 'KOI-55 b',
 'KOI-55 c',
 'KOI-217 b',
 'HD 114762 b',
 'HD 114729 b',
 'HD 116029 b',
 'HD 207832 b',
 'HD 207832 c',
 'iot Dra b',
 'HD 126614 b',
 'HD 132563 b',
 'HD 131664 b',
 'HD 142245 b',
 'Kepler-21 b',
 'Kepler-24 d',
 'Kepler-24 e',
 'PH2 b',
 'KOI-142 b',
 'KOI-142 c',
 'KOI-94 b',
 'KOI-94 c',
 'KOI-94 d',
 'KOI-94 e',
 'KOI-351 b',
 'KOI-351 c',
 'KOI-351 d',
 'KOI-351 e',
 'KOI-351 f',
 'KOI-351 g',
 'KOI-351 h',
 'Kepler-26 d',
 'Kepler-93 c',
 'Kepler-97 b',
 'Kepler-97 c',
 'Kepler-98 b',
 'Kepler-99 b',
 'HD 156846 b',
 'HD 160691 b',
 'HD 160691 c',
 'HD 160691 d',
 'Kepler-32 d',
 'Kepler-32 e',
 'Kepler

In [16]:
not_in_nasa

['11 Oph b',
 "1I/'Oumuamua",
 '1RXS 1609 b',
 '1RXS J235133.3+312720 b',
 '1SWASP J1407 b',
 '2I/Borisov',
 '2M 0103-55 (AB) b',
 '2M 0122-24 b',
 '2M 0219-39 b',
 '2M 0441+23 b',
 '2M 1938+46 b',
 '2M 2140+16 b',
 '2M 2206-20 b',
 '2M 2236+4751 b',
 '2M J2126-81 b',
 '2M0045+16',
 '2M0117-34',
 '2M1207 A',
 '2M1207 b',
 '2M1510A a',
 '2M1510A b',
 '2MASS J00413538-5621127 A',
 '2MASS J00413538-5621127 B',
 '2MASS J0249-0557 (AB) c',
 '2MASS J0249-0557A',
 '2MASS J0249-0557B',
 '2MASS J11011926-7732383 A',
 '2MASS J11011926-7732383 B',
 '2MASS J11193254 AB',
 '2MASS J1155-7919 b',
 '2MASS J1450-7841 A',
 '2MASS J1450-7841 B',
 '2MASS J15344984-2952274 A',
 '2MASS J15344984-2952274 B',
 '2MASS J2250+2325 b',
 '38 Vir b',
 '4 Uma b',
 '47 Uma b',
 '47 Uma c',
 '47 Uma d',
 '7 CMA c',
 '8 Umi b',
 'AB Dor C b',
 'AD 3116 b',
 'AS 205 A b',
 'BD+20 274 c',
 'BD+24 4697 b',
 'BD+26 1888 b',
 'BD-06 1339 d',
 "Barnard's  b",
 'CD-35 2722 b',
 'CFBDS 1458 A',
 'CFBDS0059',
 'CFBDSIR J2149-04

In [17]:
# This is more then expected. Maybe there is a difference in spelling in some of the objecs. Try to find those.

In [18]:
eu_name_list = exo_eu["# name"].to_list()
eu_name_list = [items.replace(" A ", " ") for items in eu_name_list]
eu_name_list = [items.replace(" B ", " ") for items in eu_name_list]
eu_name_list = [items.replace(" C ", " ") for items in eu_name_list]
eu_name_list = [items.replace(" (AB) ", " ") for items in eu_name_list]
eu_name_list = [items.replace(" ", "") for items in eu_name_list]


In [19]:
nasa_name_list = exo_nasa["pl_name"].to_list()
nasa_name_list = [items.replace(" A ", " ") for items in nasa_name_list]
nasa_name_list = [items.replace(" B ", " ") for items in nasa_name_list]
nasa_name_list = [items.replace(" C ", " ") for items in nasa_name_list]
nasa_name_list = [items.replace(" (AB) ", " ") for items in nasa_name_list]
nasa_name_list = [items.replace(" ", "") for items in nasa_name_list]


In [20]:
not_in_nasa = []
for items in eu_name_list:
    if items not in nasa_name_list:
        not_in_nasa.append(items)

not_in_eu = []
for items in nasa_name_list:
    if items not in eu_name_list:
        not_in_eu.append(items)

In [21]:
len(not_in_eu)

483

In [22]:
len(not_in_nasa)

580