# Extracting and Uploading MaStR - Winddata with open-mastr

In [2]:
from open_mastr import Mastr
import pandas as pd
import geopandas as gpd
import supabase_py
import psycopg2
import sqlalchemy
import os
from dotenv import load_dotenv

In [3]:
tables_list = ["wind", "location", "permit"]
db = Mastr()
#db.download(method = "bulk",
#            data=tables_list)

Data will be written to the following database: sqlite:////home/gunnar/.open-MaStR/data/sqlite/open-mastr.db
If you run into problems, try to delete the database and update the package by running 'pip install --upgrade open-mastr'



In [4]:
### Inspect db
conn = db.engine # Connection engine
tables = pd.read_sql_query('SELECT name from sqlite_master where type= "table";', conn)
df_wind = pd.read_sql_table("wind_extended", conn)
columns_wind = list(df_wind.columns)

In [None]:
### Subset to Land
# df_wind = df_wind[df_wind.Lage == "Windkraft an Land"]
df_wind.Kuestenentfernung.describe() 
df_wind.Kuestenentfernung.unique()  # Only for Offshore
### Remove columns
print(columns_wind)

In [None]:
### Very few plants actually have an address -> Not useful for displaying?
df_address = df_wind[["Strasse", "StrasseNichtGefunden",
       "Hausnummer", "HausnummerNichtGefunden", "Adresszusatz"]]
df_address.info()

In [None]:
### iNSPECT GEO-columns
geo_columns = [ 'Laengengrad',
 'Breitengrad',
 'UtmZonenwert',
 'UtmEast',
 'UtmNorth',
 'GaussKruegerHoch',
 'GaussKruegerRechts']
df_geo = df_wind[geo_columns]
df_geo.info()
df_geo.UtmZonenwert.unique()
### open-mastr has no coordinates beside wgs84

In [5]:
columns_remove = ["Lage",	"Seelage",	"ClusterOstsee", "ClusterNordsee", "Strasse", "StrasseNichtGefunden",
       "Hausnummer", "HausnummerNichtGefunden", "Adresszusatz", "NetzbetreiberpruefungStatus", 
       "NetzbetreiberpruefungDatum", "Wassertiefe", "Kuestenentfernung", "UtmZonenwert","UtmEast", "UtmNorth",
 "GaussKruegerHoch", "GaussKruegerRechts"]
columns_wind = [col for col in columns_wind if col not in columns_remove]
df_wind = df_wind[columns_wind]
columns_wind = df_wind.columns

In [6]:
### function to turn CamelCase to snake_cas
def change_case(str):
    # List comprehension, starts with an _ wich is removed by lstrip("_")
    # loops through word, if upper, _ first "_"+
    # and i.lower() as a string method
    # just return i else
    return ''.join(['_'+i.lower() if i.isupper() 
               else i for i in str]).lstrip('_')
    
def dtype_sqltype(str, map_dict):
    
    # next() jumps through the iterator until a match is found
    # with a an iterator generated by the comprehension inside ()
    return next((key for key, val in map_dict.items() if val == str), None)


In [7]:
### SQL Create Statement
pd_types = [df_wind[col].dtype for col in columns_wind]

# dictionary mapping the data types: Postgres Data type = pandas data type
map_types = {'bool': 'bool', 
                 'float8': 'float64', 
                 'date' : '<M8[ns]', 
                 'varchar':'O'}

sql_columns = []
for col in columns_wind:
    if col not in ['Laengengrad','Breitengrad']:
    # SQL create column statement for this column: 'column_name pgsql-type,'. Leave out constraints for now
    # to lowercase, underscore at uppercase
        name = change_case(col)
        sql_type = dtype_sqltype(df_wind[col].dtype, map_types)
        sql_columns.append(f"{name} {sql_type}")

sql_columns = ", \n".join(sql_columns)

# leave out geo-columns -> added later on
# add primary key
sql_create = f"""
CREATE TABLE public.wind_extended (
id bigint generated by default as identity primary key,
{sql_columns},
geom geometry(point, 4326) 
);""" 
    

In [11]:
### Connect to the database
# downloaded certiticate
# Set connection details in .env

# Load environment variables from .env file
load_dotenv()

# Get connection parameters from environment variables
dbname = os.getenv("DB_NAME")
user = os.getenv("DB_USER")
password = os.getenv("DB_PASSWORD")
host = os.getenv("DB_HOST")
port = os.getenv("DB_PORT")
ssl_cert_path = os.getenv("SSL_CERT_PATH")

# Construct the connection string
conn_str = f"dbname={dbname} user={user} password={password} host={host} port={port} sslmode=require sslrootcert={ssl_cert_path}"

# Etablish connection object

try:
    conn = psycopg2.connect(conn_str)
    print("Connection to the database successful!")
    conn_cursor = conn.cursor()
    conn_cursor.execute(sql_create)

except psycopg2.Error as e:
    print("Unable to connect to the database.")
    print(e)

finally:
    # Close the connection when done
    if conn is not None:
        conn.close()
        print("Connection closed.")

Connection to the database successful!
Connection closed.


In [13]:
# print(sql_create)
conn

<connection object at 0x7fd1f334b100; dsn: 'user=postgres password=xxx dbname=postgres host=db.cnpmlxzpwbfkunigsvsg.supabase.co port=5432', closed: 1>