# Extracting and Uploading MaStR - Winddata with open-mastr

In [None]:
from open_mastr import Mastr
import pandas as pd
import geopandas as gpd
import supabase_py
import psycopg2
import sqlalchemy
import os
from dotenv import load_dotenv

In [None]:
tables_list = ["wind", "location", "permit"]
db = Mastr()
#db.download(method = "bulk",
#            data=tables_list)

In [None]:
### Inspect db
conn = db.engine # Connection engine
tables = pd.read_sql_query('SELECT name from sqlite_master where type= "table";', conn)
df_wind = pd.read_sql_table("wind_extended", conn)
columns_wind = list(df_wind.columns)
del conn

In [None]:
### Subset to Land
# df_wind = df_wind[df_wind.Lage == "Windkraft an Land"]
df_wind.Kuestenentfernung.describe() 
df_wind.Kuestenentfernung.unique()  # Only for Offshore
### Remove columns
print(columns_wind)

In [None]:
### Very few plants actually have an address -> Not useful for displaying?
df_address = df_wind[["Strasse", "StrasseNichtGefunden",
       "Hausnummer", "HausnummerNichtGefunden", "Adresszusatz"]]
df_address.info()

In [None]:
### iNSPECT GEO-columns
geo_columns = [ 'Laengengrad',
 'Breitengrad',
 'UtmZonenwert',
 'UtmEast',
 'UtmNorth',
 'GaussKruegerHoch',
 'GaussKruegerRechts']
df_geo = df_wind[geo_columns]
df_geo.info()
df_geo.UtmZonenwert.unique()
### open-mastr has no coordinates beside wgs84

In [None]:
columns_remove = ["Lage",	"Seelage",	"ClusterOstsee", "ClusterNordsee", "Strasse", "StrasseNichtGefunden",
       "Hausnummer", "HausnummerNichtGefunden", "Adresszusatz", "NetzbetreiberpruefungStatus", 
       "NetzbetreiberpruefungDatum", "Wassertiefe", "Kuestenentfernung", "UtmZonenwert","UtmEast", "UtmNorth",
 "GaussKruegerHoch", "GaussKruegerRechts", "DatenQuelle", "DatumDownload"]
columns_wind = [col for col in columns_wind if col not in columns_remove]
df_wind = df_wind[columns_wind]
columns_wind = df_wind.columns

In [None]:
### function to turn CamelCase to snake_cas
def change_case(str):
    # List comprehension, starts with an _ wich is removed by lstrip("_")
    # loops through word, if upper, _ first "_"+
    # and i.lower() as a string method
    # just return i else
    return ''.join(['_'+i.lower() if i.isupper() 
               else i for i in str]).lstrip('_')
    
def dtype_sqltype(str, map_dict):
    
    # next() jumps through the iterator until a match is found
    # with a an iterator generated by the comprehension inside ()
    return next((key for key, val in map_dict.items() if val == str), None)


In [None]:
### SQL Create Statement
pd_types = [df_wind[col].dtype for col in columns_wind]

# dictionary mapping the data types: Postgres Data type = pandas data type
map_types = {'bool': 'bool', 
                 'float8': 'float64', 
                 'date' : '<M8[ns]', 
                 'varchar':'O'}

sql_columns = []
for col in columns_wind:
    if col not in ['Laengengrad','Breitengrad']:
    # SQL create column statement for this column: 'column_name pgsql-type,'. Leave out constraints for now
    # to lowercase, underscore at uppercase
        name = change_case(col)
        sql_type = dtype_sqltype(df_wind[col].dtype, map_types)
        sql_columns.append(f"{name} {sql_type}")

sql_columns = ", \n ".join(sql_columns)

# leave out geo-columns -> added later on
# add primary key
sql_create = f"""
CREATE TABLE public.wind_extended (
id bigint generated by default as identity primary key,
{sql_columns},
geom geometry(point, 4326) 
);""" 
    

In [None]:
### Connect to the database
# downloaded certiticate
# Set connection details in .env

# Load environment variables from .env file
load_dotenv()

# Get connection parameters from environment variables
dbname = os.getenv("DB_NAME")
user = os.getenv("DB_USER")
password = os.getenv("DB_PASSWORD")
host = os.getenv("DB_HOST")
port = os.getenv("DB_PORT")
ssl_cert_path = os.getenv("SSL_CERT_PATH")

# Construct the connection string
conn_str = f"dbname={dbname} user={user} password={password} host={host} port={port} sslmode=require sslrootcert={ssl_cert_path}"

# Etablish connection object

#conn.close()
conn = psycopg2.connect(conn_str)
print(conn)
print("Connection to the database successful!")
conn_cursor = conn.cursor()
# conn_cursor.execute(sql_create)
# conn.commit()


# conn.close()

In [None]:
### Selection works fine, as it does necessarily need conn.commit()
sql_test = "SELECT column_name FROM information_schema.columns WHERE table_schema = 'public' AND table_name = 'wind_extended';"

conn.close()
conn = psycopg2.connect(conn_str)
print(conn)
print("Connection to the database successful!")
conn_cursor = conn.cursor()
conn_cursor.execute(sql_test)
results = conn_cursor.fetchall()
print(results)

conn.close()

In [35]:
### Function to generate Insert-Statement based on one row
### How to Insert: Bool as unquoted True/False 
### Date - as 'yyyy-mm-dd' - already in the right format
geo_columns = ['Laengengrad', 'Breitengrad'] # corresponds to the one column 'geom' in DB

### Loop through columns
### How should the geo-insert look like
columns_wind = df_wind.columns

columns_data = [col for col in columns_wind if col not in geo_columns]
df_wind_data = df_wind[columns_data]
df_wind_coords = df_wind[geo_columns]

row_data = df_wind_data.iloc[[0],:]
row_geo = df_wind_coords.iloc[[0],:]




In [36]:
columns_sql = []
values_sql = []
for col in columns_data:
    if row_data[col].values[0] is not None:
        columns_sql.append(change_case(col))
        values_sql.append(row_data[col].values[0])

lat = geo_columns[0]
lon = geo_columns[1]
### Append geom and its values to corresponding lists
if row_geo[lon].values[0] is not None and row_geo[lat].values[0] is not None:
    columns_sql.append('geom')
    wkt_point = f"POINT({row_geo[lon].values[0]} {row_geo[lat].values[0]})"
    val_geo = f"ST_GeomFromText({wkt_point},4326)"
    values_sql.append(val_geo)

In [37]:
values_sql
### Datetime -> to date simplify

['Windpark Kreuzstein',
 'ENERCON GmbH',
 'Horizontalläufer',
 'E-115',
 149.08,
 115.71,
 True,
 True,
 True,
 False,
 False,
 True,
 True,
 False,
 'EEG951718125489',
 numpy.datetime64('2019-02-13T00:00:00.000000000'),
 'SEE940146675093',
 numpy.datetime64('2022-11-17T15:19:47.568901000'),
 'SEL911577226093',
 'ABR930129817008',
 'Deutschland',
 'Hessen',
 'Werra-Meißner-Kreis',
 'Gutsbezirk Kaufunger Wald',
 '06636200',
 '34298',
 'Großalmerode',
 'Flur 51: Flurstück 28',
 'Helsa',
 numpy.datetime64('NaT'),
 numpy.datetime64('NaT'),
 numpy.datetime64('2017-09-01T00:00:00.000000000'),
 numpy.datetime64('NaT'),
 numpy.datetime64('NaT'),
 numpy.datetime64('NaT'),
 numpy.datetime64('NaT'),
 'Aktiviert',
 'In Betrieb',
 False,
 numpy.datetime64('NaT'),
 numpy.datetime64('NaT'),
 'WEA 5',
 'Wind',
 3000.0,
 3000.0,
 True,
 True,
 True,
 'Volleinspeisung',
 'SGE961876398816',
 False,
 False,
 False,
 'bulk',
 numpy.datetime64('2023-11-09T00:00:00.000000000'),
 'ST_GeomFromText(POINT(51.270