In [None]:
from open_mastr import Mastr
import pandas as pd
import geopandas as gpd
import supabase_py
import psycopg2
import sqlalchemy
import os
import numpy as np
from dotenv import load_dotenv
from pandas.api.types import is_datetime64_any_dtype
from datetime import datetime
import time

In [None]:
### function to turn CamelCase to snake_case
def change_case(str):
    # List comprehension, starts with an _ wich is removed by lstrip("_")
    # loops through word, if upper, _ first "_"+
    # and i.lower() as a string method
    # just return i else
    return ''.join(['_'+i.lower() if i.isupper() 
               else i for i in str]).lstrip('_')
    
def dtype_sqltype(str, map_dict):
    
    # next() jumps through the iterator until a match is found
    # with a an iterator generated by the comprehension inside ()
    return next((key for key, val in map_dict.items() if val == str), None)

In [None]:
### Connection to local mastr-download
db = Mastr()

### Inspect db
conn = db.engine # Connection engine
tables = pd.read_sql_query('SELECT name from sqlite_master where type= "table";', conn)
df_wind = pd.read_sql_table("wind_extended", conn)
columns_wind = list(df_wind.columns)

In [None]:
### Connection to remote supabase-postgres goal:100 db
### Connect to the database
# downloaded certiticate
# Set connection details in .env

# Load environment variables from .env file
load_dotenv()

# Get connection parameters from environment variables
dbname = os.getenv("DB_NAME")
user = os.getenv("DB_USER")
password = os.getenv("DB_PASSWORD")
host = os.getenv("DB_HOST")
port = os.getenv("DB_PORT")
ssl_cert_path = os.getenv("SSL_CERT_PATH")

# Construct the connection string
conn_str = f"dbname={dbname} user={user} password={password} host={host} port={port} sslmode=require sslrootcert={ssl_cert_path}"

# Etablish connection object

#conn.close()
conn = psycopg2.connect(conn_str)

In [None]:
### Set schema_name and table_name
schema_name = "public"
table_name = "wind_extended"

### Columns names of local mastr-download
columns_wind_om = columns_wind

### Get column names of remote db


sql_cols = """SELECT column_name FROM information_schema.columns WHERE table_name = %s
AND table_schema = %s;"""

conn_cursor = conn.cursor()

# drop table if it already exists
conn_cursor.execute(sql_cols, (table_name, schema_name))
columns_wind_rem = conn_cursor.fetchall()

conn_cursor.close()

columns_wind_rem = [col[0] for col in columns_wind_rem]


In [None]:
### Reduced column_names before upload as 01_open-mastr_extract suggests
columns_remove = ["Lage",	"Seelage",	"ClusterOstsee", "ClusterNordsee", "Strasse", "StrasseNichtGefunden",
       "Hausnummer", "HausnummerNichtGefunden", "Adresszusatz", "NetzbetreiberpruefungStatus", 
       "NetzbetreiberpruefungDatum", "Wassertiefe", "Kuestenentfernung", "UtmZonenwert","UtmEast", "UtmNorth",
 "GaussKruegerHoch", "GaussKruegerRechts", "DatenQuelle", "DatumDownload"]
columns_wind_om2 = [col for col in columns_wind if col not in columns_remove]

In [None]:
### Columns of the open_mastr download not in the database?
columns_om_only = [col for col in columns_wind if change_case(col) not in columns_wind_rem]
columns_om_only

### Is there a column which quickly identifies the offshore vs onshore
df_off_on = df_wind[['Lage',
 'Seelage',
 'ClusterOstsee',
 'ClusterNordsee',
 'Wassertiefe',
 'Kuestenentfernung','Bundesland','Landkreis']]


In [None]:
df_off_on.info() ### only three entries without an Land/ auf See entry

### Seelage: eindeutiger. Lage tuts auch
df_off_on.Seelage.unique()

df_off = df_off_on[df_off_on.Seelage.isin(['Nordsee', 'Ostsee'])]

df_off.info()

In [32]:
### SQL Alter Statement
columns_add = ["Lage", "Seelage"]

pd_types = [df_wind[col].dtype for col in columns_add]

# dictionary mapping the data types: Postgres Data type = pandas data type
map_types = {'bool': 'bool', 
                 'float8': 'float64', 
                 'date' : '<M8[ns]', 
                 'varchar':'O'}

### SQL statement to add empty columns and data-type
### SQL Inserts with where einheit_mastr_nummer = ...
sql_columns = []
for col in columns_add:
    # SQL create column statement for this column: 'column_name pgsql-type,'. Leave out constraints for now
    # to lowercase, underscore at uppercase
    name = change_case(col)
    sql_type = dtype_sqltype(df_wind[col].dtype, map_types)
    sql_columns.append(f"ADD COLUMN {name} {sql_type}")

sql_columns = ", \n ".join(sql_columns)

sql_alter = f"""ALTER TABLE {schema_name}.{table_name}
{sql_columns};
"""

conn_cursor = conn.cursor()

# Add columns
conn_cursor.execute(sql_alter)

conn.commit()

conn_cursor.close()

In [31]:

### Helper Function for one preprocessed row without geodata 
### to generate column names and values

def row_data_to_sql(row_data, columns_data):
    ### Lists for column names as needed for the postgres-table and the values as given to the sql statement
    columns_sql = []
    values_sql = []

    for col in columns_data:
        val = row_data[col].values[0]
        # print(val)
        # print(type(val))
        
    # Test wether the column holds a value and is not empty
        if pd.notna(val):
        # add column name
            columns_sql.append(change_case(col))
        # Apply date to string transformation
            if isinstance(val, str):
                #print("is_str")
                values_sql.append(f"$${val}$$")   # add a pair of parentheses to keep for the join
            elif is_datetime64_any_dtype(val):
                #print("is_datetime")
                val = np.datetime_as_string(val, unit="D")
                values_sql.append(f"'{val}'")
            else:
                #print("is_float_bool")
                values_sql.append(str(val))     # cast to str without adding parentheses
    
    return columns_sql, values_sql

### helper function to construct the INSERT query for one row 
#   from the columns_sql and values_sql lists

def join_insert_sql(columns_sql, values_sql):
    # join sql-column names and values respectively into a single string
    columns_sql = ", \n ".join(columns_sql) 
    values_sql = ", \n ".join(values_sql)

    # Create INSERT-Query for one row
    sql_insert = f"""INSERT INTO {schema_name}.{table_name} (
        {columns_sql} )
    VALUES (
        {values_sql}
        )
    WHERE {id_columnn} = {id_value}    ;
    """
    
    return sql_insert

### Function for all rows
def df_to_sql_insert(df_upload, conn_db):
    # join sql-column names and values respectively into a single string
    ### Loop through columns
    ### How should the geo-insert look like
    columns_wind = df_upload.columns

    ### column names where each name corresponds to one value (not true for db column geom) 
    columns_data = columns_wind #[col for col in columns_wind if col not in geo_columns]
    
    ### List to hold all INSERT Statements
    inserts_all = []
    
    for i in range(len(df_upload)):
        row_wind = df_upload.iloc[[i],:]
        
        row_data = row_wind[columns_data]    
        #row_geo = row_wind[geo_columns]
        
        columns_sql, values_sql = row_data_to_sql(row_data, columns_data)
        #columns_sql, values_sql = row_geo_to_sql(row_geo, geo_columns, columns_sql, values_sql)
        insert_sql = join_insert_sql(columns_sql, values_sql)
        
        inserts_all.append(insert_sql)
    
    inserts_all_sql = " \n ".join(inserts_all)
    
        # Establish a connection to the database    
    try:
        # Create a cursor
        cur = conn_db.cursor()
    
        # Execute your SQL statement
        cur.execute(inserts_all_sql)
    
        # Commit the transaction
        conn.commit()
    
    except Exception as e:
        # Handle the exception
        print(f"Error: {e}")
        conn.rollback()
    
    finally:
        # Close the cursor and connection
        cur.close()
        #conn.close()


