In [1]:
import pandas as pd
from getpass import getpass
from sqlalchemy import create_engine, text
from sqlalchemy.orm import Session
import psycopg
import psycopg2

In [3]:
# Erstellen der Verbindung und Datenbank mit SQLAlchemy

pw = getpass('Please enter password: ')
connection_uri = f'postgresql://postgres:{pw}@localhost:5432/postgres'
engine = create_engine(connection_uri, isolation_level='AUTOCOMMIT')

with engine.connect() as conn:
    # Bestehende Datenbank löschen, falls sie existiert
    conn.execute(text("DROP DATABASE IF EXISTS nasa_exoplanets;"))
    # Neue Datenbank erstellen
    conn.execute(text("CREATE DATABASE nasa_exoplanets;"))

# SQLAlchemy-Engine freigeben
engine.dispose()

OperationalError: (psycopg2.errors.ObjectInUse) FEHLER:  auf Datenbank »nasa_exoplanets« wird von anderen Benutzern zugegriffen
DETAIL:  3 andere Sitzungen verwenden die Datenbank.

[SQL: DROP DATABASE IF EXISTS nasa_exoplanets;]
(Background on this error at: https://sqlalche.me/e/14/e3q8)

In [2]:
# Verbindung zu neuer Datenbank herstellen
pw = getpass('Please enter password: ')
connection_url = f'postgresql://postgres:{pw}@localhost:5432/nasa_exoplanets'
engine = create_engine(connection_url)

# Überprüfen der Verbindung
with engine.connect() as conn_alchemy:
    print("SQLAlchemy-Verbindung erfolgreich hergestellt.")

SQLAlchemy-Verbindung erfolgreich hergestellt.


In [3]:
# Verbindung mit psycopg (für Legacy-Kompatibilität)
pw = getpass('Please enter password: ')
with psycopg.connect(
    host='localhost',
    port='5432',
    user='postgres',
    password=pw,
    dbname='nasa_exoplanets',
    autocommit=True
) as connection:
    print("psycopg-Verbindung erfolgreich hergestellt.")

psycopg-Verbindung erfolgreich hergestellt.


In [4]:
old_data = pd.read_csv('cleaned_exoplanet_data.csv')
old_data

Unnamed: 0,name,distance,stellar_magnitude,planet_type,discovery_year,mass_multiplier,mass_wrt,radius_multiplier,radius_wrt,orbital_radius,orbital_period,eccentricity,detection_method
0,11 Comae Berenices b,304.0,4.72307,Gas Giant,2007,19.40000,Jupiter,1.080,Jupiter,1.290000,0.892539,0.23,Radial Velocity
1,11 Ursae Minoris b,409.0,5.01300,Gas Giant,2009,14.74000,Jupiter,1.090,Jupiter,1.530000,1.400000,0.08,Radial Velocity
2,14 Andromedae b,246.0,5.23133,Gas Giant,2008,4.80000,Jupiter,1.150,Jupiter,0.830000,0.508693,0.00,Radial Velocity
3,14 Herculis b,58.0,6.61935,Gas Giant,2002,8.13881,Jupiter,1.120,Jupiter,2.773069,4.800000,0.37,Radial Velocity
4,16 Cygni B b,69.0,6.21500,Gas Giant,1996,1.78000,Jupiter,1.200,Jupiter,1.660000,2.200000,0.68,Radial Velocity
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5245,XO-7 b,764.0,10.52100,Gas Giant,2019,0.70900,Jupiter,1.373,Jupiter,0.044210,0.007940,0.04,Transit
5246,YSES 2 b,357.0,10.88500,Gas Giant,2021,6.30000,Jupiter,1.140,Jupiter,115.000000,1176.500000,0.00,Direct Imaging
5247,YZ Ceti b,12.0,12.07400,Terrestrial,2017,0.70000,Earth,0.913,Earth,0.016340,0.005476,0.06,Radial Velocity
5248,YZ Ceti c,12.0,12.07400,Super Earth,2017,1.14000,Earth,1.050,Earth,0.021560,0.008487,0.00,Radial Velocity


In [6]:
new_data = pd.read_excel('cleaned_stars-planets2024_data.xlsx')
new_data

Unnamed: 0,planet_name,host_star_name,discovery_method,discovery_year,discovery_telescope,orbital_period_days,planet_radius_earth_radius,planet_radius_jupiter_radius,planet_mass_earth_mass,planet_mass_jupiter_mass,spectral_type,stellar_radius_solar_radius,stellar_mass_solar_mass
0,Wolf 1061 c,Wolf 1061,Radial Velocity,2015,3.6 m ESO Telescope,17.871900,166.0,148.0,341.0,1073.0,M3.5,0.31,0.29
1,Wolf 1061 d,Wolf 1061,Radial Velocity,2015,3.6 m ESO Telescope,217.210000,269.0,24.0,77.0,2423.0,M3.5,0.31,0.29
2,Wolf 503 b,Wolf 503,Transit,2018,0.95 m Kepler Telescope,6.001270,2043.0,182.0,627.0,1973.0,K3.5 V,0.69,0.69
3,XO-1 b,XO-1,Transit,2006,Canon 200mm f/1.8L,3.941530,12778.0,114.0,2637989.0,83.0,G1 V,0.88,0.88
4,XO-2 N b,XO-2 N,Transit,2007,Canon 200mm f/1.8L,2.615862,11131.0,993.0,17989178.0,566.0,K0 V,0.99,0.97
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5246,K2-382 b,K2-382,Transit,2022,0.95 m Kepler Telescope,21.700156,2072.0,185.0,495.0,156.0,,0.81,0.92
5247,K2-383 b,K2-383,Transit,2022,0.95 m Kepler Telescope,1.865962,1564.0,14.0,307.0,965.0,,0.80,1.06
5248,K2-384 b,K2-384,Transit,2022,0.95 m Kepler Telescope,2.231527,1076.0,96.0,126.0,398.0,M4 V,0.35,0.33
5249,K2-384 c,K2-384,Transit,2022,0.95 m Kepler Telescope,4.194766,1191.0,106.0,182.0,572.0,M4 V,0.35,0.33


In [15]:
# Verbindung herstellen
engine = create_engine(connection_url)

# Eine Session verwenden, um Abfragen auszuführen
with Session(engine) as session:
    # Löschen der Tabelle, falls sie existiert
    session.execute(text("DROP TABLE IF EXISTS old_data"))

    # Erstellen der neuen Tabellen
    create_old_data_table = """
        CREATE TABLE old_data(
            id SERIAL PRIMARY KEY,
            name VARCHAR,
            distance FLOAT,
            stellar_magnitude FLOAT,
            planet_type VARCHAR,
            discovery_year INT,
            mass_multiplier FLOAT,
            mass_wrt VARCHAR,
            radius_multiplier FLOAT,
            radius_wrt VARCHAR,
            orbital_radius FLOAT,
            orbital_period FLOAT,
            eccentricity FLOAT,
            detection_method VARCHAR);"""

    # Query ausführen
    session.execute(text(create_old_data_table))


# Eine Session verwenden, um Abfragen auszuführen
with Session(engine) as session:
    # Löschen der Tabelle, falls sie existiert
    session.execute(text("DROP TABLE IF EXISTS new_data"))

    # Erstellen der neuen Tabellen
    create_new_data_table = """
        CREATE TABLE new_data(
            planet_name VARCHAR,
            host_star_name VARCHAR,
            discovery_method VARCHAR,
            discovery_year INT,
            discovery_telescope VARCHAR,
            orbital_period_days FLOAT,
            planet_radius_earth_radius FLOAT,
            planet_radius_jupiter_radius FLOAT,
            planet_mass_earth_mass FLOAT,
            planet_mass_jupiter_mass FLOAT,
            spectral_type VARCHAR,
            stellar_radius_solar_radius FLOAT,
            stellar_mass_solar_mass FLOAT);"""

    # Query ausführen
    session.execute(text(create_new_data_table))
	# Änderungen speichern
    session.commit()

In [10]:
new_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5251 entries, 0 to 5250
Data columns (total 13 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   planet_name                   5251 non-null   object 
 1   host_star_name                5251 non-null   object 
 2   discovery_method              5251 non-null   object 
 3   discovery_year                5251 non-null   int64  
 4   discovery_telescope           5251 non-null   object 
 5   orbital_period_days           5030 non-null   float64
 6   planet_radius_earth_radius    5232 non-null   float64
 7   planet_radius_jupiter_radius  5232 non-null   float64
 8   planet_mass_earth_mass        5225 non-null   float64
 9   planet_mass_jupiter_mass      5225 non-null   float64
 10  spectral_type                 1821 non-null   object 
 11  stellar_radius_solar_radius   5041 non-null   float64
 12  stellar_mass_solar_mass       5247 non-null   float64
dtypes: 

In [14]:
engine.dispose()