In [6]:
import pandas as pd
import altair as alt

In [28]:
windTurbine = pd.read_csv("data/opendata_wka_ib_gv_vb_sh_k_20231011.csv", sep=";")

In [58]:
windTurbine.head()

Unnamed: 0,KREIS,GEMEINDE,TYP,HERSTELLER,NABENHOEHE,ROTORDURCHMESSER,SCHALLLEISTUNGSPEGEL,LEISTUNG,LEISTUNGSBEZUG,OSTWERT,...,GENEHMIGT_AM,INBETRIEBNAHME,STATUS,BST_NR,ANL_NR,AKTENZEICHEN,DATENDATUM,DATENQUELLE,Longitude,Latitude
0,Kiel,Kiel,"Nordex N131 - 3,0 MW",Nordex,134.0,131.0,,3000,kW Nennleist. Rotor.,571605,...,,,im Gen.Verf.,2000000317,1,G20/2016/085,10.10.2023,"LfU SH, Abtl. 7",10.099418,54.265863
1,Kiel,Kiel,"Nordex N131 - 3,0 MW",Nordex,134.0,131.0,,3000,kW Nennleist. Rotor.,572117,...,,,im Gen.Verf.,2000000317,2,G20/2016/086,10.10.2023,"LfU SH, Abtl. 7",10.107237,54.264263
2,Kiel,Kiel,"Nordex N131 - 3,0 MW",Nordex,134.0,131.0,,3000,kW Nennleist. Rotor.,572497,...,,,im Gen.Verf.,2000000317,3,G20/2016/087,10.10.2023,"LfU SH, Abtl. 7",10.113098,54.265261
3,Lübeck,Lübeck,Enercon E 82 E,Enercon,78.0,82.0,"104,5 dB(A)",2300,kW Nennleist. Rotor.,619817,...,01.04.2011,25.09.2012,in Betrieb,3000924205,1,G30/031/2010,10.10.2023,"LfU SH, Abtl. 7",10.825546,53.944364
4,Lübeck,Lübeck,Enercon E-82 E,Enercon,85.0,82.0,1045,2300,kW Nennleist. Rotor.,619815,...,01.04.2011,25.09.2012,in Betrieb,3000924205,2,G30/030/2010,10.10.2023,"LfU SH, Abtl. 7",10.82517,53.93645


# Data Cleaning and preperation

In [29]:
# data cleanup
# convertig the NABENHOEHE, ROTORDURCHMESSER from string to float and replace the ',' with '.'
windTurbine["NABENHOEHE"] = windTurbine["NABENHOEHE"].str.replace(",", ".").astype(float)
windTurbine["ROTORDURCHMESSER"] = windTurbine["ROTORDURCHMESSER"].str.replace(",", ".").astype(float)

In [56]:
# converting the used coordinates into lat/lon using pyproj
import pyproj

def convert_coordinates(df, easting_col, northing_col):
    # Define the projections
    utm_proj = pyproj.Proj(proj='utm', zone=32, ellps='WGS84')  # UTM coordinates (assuming zone 32)
    latlong_proj = pyproj.Proj(proj='latlong', datum='WGS84')  # LatLon with WGS84 datum used by GPS units and Google Earth

    # Apply the conversion to each row in the dataframe
    df['Longitude'], df['Latitude'] = pyproj.transform(utm_proj, latlong_proj, df[easting_col].values, df[northing_col].values)

    return df

# Convert the coordinates
windTurbine = convert_coordinates(windTurbine, 'OSTWERT', 'NORDWERT')



  df['Longitude'], df['Latitude'] = pyproj.transform(utm_proj, latlong_proj, df[easting_col].values, df[northing_col].values)


In [57]:
chart = alt.Chart(windTurbine).mark_circle(aspect=True).encode(
    x='NABENHOEHE',
    y='LEISTUNG',
    size='ROTORDURCHMESSER',
    tooltip=['NABENHOEHE', 'ROTORDURCHMESSER', 'LEISTUNG']
).properties(
    width=400 ,
    height=400
).interactive()

chart


In [60]:
import altair as alt

# Load a map of Germany
germany_map = alt.topo_feature('https://github.com/isellsoap/deutschlandGeoJSON/blob/main/1_deutschland/1_sehr_hoch.geo.json', 'features')

# Create a chart object
chart = alt.Chart(germany_map).mark_geoshape(
    fill='lightgray',
    stroke='white'
).project('identity').properties(
    width=500,
    height=300
)

# Create a points object
points = alt.Chart(windTurbine).mark_circle().encode(
    longitude='Longitude:Q',
    latitude='Latitude:Q',
    tooltip=['Latitude', 'Longitude']
).properties(
    title='Wind Turbines in Schleswig-Holstein, Germany'
)

# Display the map
chart + points



In [62]:
import folium

# Create a map centered around Schleswig-Holstein
m = folium.Map(location=[54.2194, 9.6961], zoom_start=7)

# Add points for each wind turbine
for index, row in windTurbine.iterrows():
    folium.Marker([row['Latitude'], row['Longitude']]).add_to(m)

# Display the map
m


In [64]:
# print all possible values for SCHALLLEISTUNG as json
import json
print(json.dumps(windTurbine["SCHALLLEISTUNGSPEGEL"].unique().tolist()))


[NaN, "104,5 dB(A)", "104,5", "98,3", "HZ /dB(A) | 63/81,2 | 125/87 | 250/92,2 | 500/96,2 | 1000/97 | 2000/94,9 | 4000/87,4 | 8000/ | SLP=101,8 dB(A); 108,5 dB(A)", "106,5 dB(A)/106,5 dB(A)", "106,5 dB(A)/104,8 dB(A)", "105,6 dB(A)/105,6 dB(A)", "NUR TAGBETRIEB MIT: HZ /dB(A) | 63/86,2 | 125/95 | 250/99,3 | 500/102,5 | 1000/101,2 | 2000/99,2 | 4000/94,9 | 8000/ | SLP=107,4 dB(A)", "105,6dB(A)/105,6 dB(A)", "103", "106,0 dB(A), 104,4dB(A)", "106,0 dB(A), 106,0 dB(A)", "106dB(A), 106dB(A)", "106,0 dB(A), 104,0 dB(A) ", "105,2 dB(A), 103,7dB(A)", "105,2 dB(A), 105,2dB(A)", "105,2 dB(A)/105,2 dB(A)", "105,2 dB(A), 104,3dB(A)", "105,2 dB(A), 104,7dB(A)", "105,2 dB(A), 103,3dB(A)", "105,2 dB(A), 104,2 dB(A)", "105,2 dB(A), 104,0dB(A)", "105,2 dB(A), 105,2 dB(A)", "105,6 dB(A), 100 dB(A)", "104 dB(A)tags; 103 dB(A) nachts", "104dB(A) tags; 103,8dB(A) nachts ", "104dB(A) tags; 103,8 dB(A) nachts", "104dB(A) tags; 102,5dB(A) nachts", "104dB(A) tags; 99,5dB(A) nachts", "104dB(A) tags; 103,4dB(A)

In [65]:
import pandas as pd
import numpy as np
import re

# Assuming windTurbine is your DataFrame and "SCHALLLEISTUNGSPEGEL" is your column
unique_values = windTurbine["SCHALLLEISTUNGSPEGEL"].unique().tolist()

def split_hz_db(value):
    if pd.isnull(value):
        return np.nan, np.nan
    else:
        hz_db_pairs = re.findall(r'(\d+)/(\d+,\d+)', value)
        hz_values = [pair[0] for pair in hz_db_pairs]
        db_values = [pair[1] for pair in hz_db_pairs]
        return hz_values, db_values

windTurbine['hz_values'], windTurbine['db_values'] = zip(*windTurbine['SCHALLLEISTUNGSPEGEL'].map(split_hz_db))


0       NaN
1       NaN
2       NaN
3        []
4        []
       ... 
3939     []
3940     []
3941     []
3942     []
3943     []
Name: hz_values, Length: 3944, dtype: object