In [22]:
# imports:

import pandas as pd
from getpass import getpass
from sqlalchemy import create_engine, text
from sqlalchemy.orm import Session
import psycopg


In [23]:
# check_in:

pw = getpass('Please enter password: ')
connection_url = f'postgresql://postgres:{pw}@localhost:5432/nasa_exoplanets'
engine = create_engine(connection_url)

# check connection:
with engine.connect() as conn_alchemy:
    print("SQLAlchemy connected!")


# connection psycopg:
pw = getpass('Please enter password: ')
with psycopg.connect(
    host='localhost',
    port='5432',
    user='postgres',
    password=pw,
    dbname='nasa_exoplanets',
    autocommit=True
) as connection:
    print("psycopg connected!")

SQLAlchemy connected!
psycopg connected!


In [11]:
sel_stmt = '''
    SELECT DISTINCT
	    n.planet_name,
	    o.planet_type,
	    n.host_star_name,
	    n.spectral_type,
	    o.distance,
	    n.discovery_year,
	    n.discovery_method,
	    n.orbital_period_days,
	    o.mass_multiplier,
	    o.mass_wrt,
	    n.planet_mass_earth_mass,
	    n.planet_mass_jupiter_mass,
	    o.radius_multiplier,
	    o.radius_wrt,
	    n.planet_radius_earth_radius,
	    n.planet_radius_jupiter_radius
    FROM new_data AS n
    INNER JOIN old_data AS o ON n.planet_name = o.name'''

complete_df = pd.read_sql(sel_stmt, engine)

In [12]:
complete_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5100 entries, 0 to 5099
Data columns (total 16 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   planet_name                   5100 non-null   object 
 1   planet_type                   5100 non-null   object 
 2   host_star_name                5100 non-null   object 
 3   spectral_type                 1720 non-null   object 
 4   distance                      5084 non-null   float64
 5   discovery_year                5100 non-null   int64  
 6   discovery_method              5100 non-null   object 
 7   orbital_period_days           4918 non-null   float64
 8   mass_multiplier               5078 non-null   float64
 9   mass_wrt                      5078 non-null   object 
 10  planet_mass_earth_mass        5075 non-null   float64
 11  planet_mass_jupiter_mass      5075 non-null   float64
 12  radius_multiplier             5085 non-null   float64
 13  rad

In [13]:
complete_df.to_sql('complete_data', engine)

100

In [26]:
sel_values = '''
    SELECT
        f.planet_id,
        f.star_id,
	    f.spectral_type,
	    f.distance,
	    f.discovery_year,
	    f.orbital_period_days,
	    f.mass_multiplier,
	    f.mass_wrt,
	    f.radius_multiplier,
	    f.radius_wrt
	FROM finalshape_fulldata AS f'''

values_df = pd.read_sql(sel_values, engine)

In [27]:
values_df.to_sql('values', engine)

142

In [29]:
stars = '''
    SELECT
    	DISTINCT(host_star_name),
    	star_id,
    	spectral_type
	FROM finalshape_fulldata'''

stars_df = pd.read_sql(stars, engine)
stars_df

Unnamed: 0,host_star_name,star_id,spectral_type
0,Kepler-1000,446,
1,Kepler-1699,1288,
2,EPIC 212297394,1725,
3,WASP-138,2381,
4,Kepler-1367,2541,
...,...,...,...
3858,Kepler-759,3147,
3859,BD-13 2130,800,G5 IV/V
3860,WASP-164,3224,G2 V
3861,Kepler-1238,2870,


In [11]:
stars_df.to_sql('stars', engine)

948

In [17]:
sel_stmt_cd2 = '''
    SELECT DISTINCT
	    c.planet_id,
	    c.planet_name,
	    c.planet_type,
	    c.host_star_name,
	    s.star_id,
	    c.spectral_type,
	    c.mass_multiplier,
	    c.mass_wrt,
	    c.orbital_period_days,
	    c.radius_wrt,
	    c.radius_multiplier,
	    c.distance,
	    c.discovery_year
    FROM complete_data AS c
    JOIN stars AS s USING (host_star_name)
    GROUP BY c.planet_id,
         c.planet_name,
         c.planet_type,
         c.host_star_name,
         s.star_id,
         c.spectral_type,
         c.mass_multiplier,
         c.mass_wrt,
         c.orbital_period_days,
         c.radius_wrt,
         c.radius_multiplier,
         c.distance,
         c.discovery_year;'''

complete_df2 = pd.read_sql(sel_stmt_cd2, engine)

In [18]:
complete_df2

Unnamed: 0,planet_id,planet_name,planet_type,host_star_name,star_id,spectral_type,mass_multiplier,mass_wrt,orbital_period_days,radius_wrt,radius_multiplier,distance,discovery_year
0,3362,Kepler-1770 b,Neptune-like,Kepler-1770,164,,8.45,Earth,16.841900,Jupiter,0.253,1902.0,2021
1,3457,Kepler-1691 b,Super Earth,Kepler-1691,1359,,4.02,Earth,3.848200,Earth,1.835,4135.0,2020
2,428,Kepler-1943 b,Super Earth,Kepler-1943,1329,,3.02,Earth,4.850180,Earth,1.549,3277.0,2021
3,3855,Kepler-1000 b,Neptune-like,Kepler-1000,446,,20.30,Earth,120.018127,Jupiter,0.425,3639.0,2016
4,51,Kepler-1043 b,Neptune-like,Kepler-1043,2703,,7.04,Earth,38.505340,Jupiter,0.227,2947.0,2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5137,3995,Kepler-680 b,Neptune-like,Kepler-680,1504,,6.44,Earth,3.689926,Jupiter,0.216,4868.0,2016
5138,3718,Kepler-299 b,Super Earth,Kepler-299,3508,,2.30,Earth,2.927128,Earth,1.320,3432.0,2014
5139,4490,K2-384 e,Super Earth,K2-384,1071,M4 V,2.37,Earth,9.715043,Earth,1.345,270.0,2022
5140,3346,Kepler-968 c,Super Earth,Kepler-968,3729,,3.50,Earth,5.709405,Earth,1.690,947.0,2016


In [19]:
complete_df2.to_sql('finalshape_fulldata', engine)

142

In [42]:
sel_stmt_pl_types = '''
    SELECT
	    planet_id,
	    planet_name,
	    planet_type,
	    star_id
    FROM finalshape_fulldata
    GROUP BY planet_id
    ORDER BY planet_id;'''

pl_types = pd.read_sql(sel_stmt_pl_types, engine)
pl_types.to_sql('planets', engine)

142

In [43]:
sel_stmt_pl_type1 = '''
    SELECT
	    planet_id,
	    planet_name,
	    planet_type,
	    star_id
    FROM finalshape_fulldata
    WHERE planet_type = 'Terrestrial'
    GROUP BY planet_id
    ORDER BY planet_id;'''

pl_type1 = pd.read_sql(sel_stmt_pl_type1, engine)
pl_type1.to_sql('terrestrial planets', engine)

193

In [44]:
sel_stmt_pl_type2 = '''
    SELECT
	    planet_id,
	    planet_name,
	    planet_type,
	    star_id
    FROM finalshape_fulldata
    WHERE planet_type = 'Super Earth'
    GROUP BY planet_id
    ORDER BY planet_id;'''

pl_type2 = pd.read_sql(sel_stmt_pl_type2, engine)
pl_type2.to_sql('super earth planets', engine)

588

In [45]:
sel_stmt_pl_type3 = '''
    SELECT
	    planet_id,
	    planet_name,
	    planet_type,
	    star_id
    FROM finalshape_fulldata
    WHERE planet_type = 'Gas Giant'
    GROUP BY planet_id
    ORDER BY planet_id;'''

pl_type3 = pd.read_sql(sel_stmt_pl_type3, engine)
pl_type3.to_sql('gas giant planets', engine)

539

In [46]:
sel_stmt_pl_type4 = '''
    SELECT
	    planet_id,
	    planet_name,
	    planet_type,
	    star_id
    FROM finalshape_fulldata
    WHERE planet_type = 'Neptune-like'
    GROUP BY planet_id
    ORDER BY planet_id;'''

pl_type4 = pd.read_sql(sel_stmt_pl_type4, engine)
pl_type4.to_sql('neptune-like planets', engine)

817

In [47]:
sel_stmt_pl_type5 = '''
    SELECT
	    planet_id,
	    planet_name,
	    planet_type,
	    star_id
    FROM finalshape_fulldata
    WHERE planet_type = 'Unknown'
    GROUP BY planet_id
    ORDER BY planet_id;'''

pl_type5 = pd.read_sql(sel_stmt_pl_type5, engine)
pl_type5.to_sql('unknown planets', engine)

5