<img src="http://openenergy-platform.org/static/OEP_logo_2_no_text.svg" alt="OpenEnergy Platform" height="100" width="100"  align="left"/>
<img src="http://reiner-lemoine-institut.de//wp-content/uploads/2015/09/rlilogo.png" alt="RLI" height="100" width="100" align="right"/>

# OpenEnergyPlatform
<br><br>

# MaStR Analyze
Repository: https://github.com/OpenEnergyPlatform/data-preprocessing/tree/master/data-import/bnetza_mastr

Please report bugs and improvements here: https://github.com/OpenEnergyPlatform/data-preprocessing/issues <br>
How to get started with Jupyter Notebooks can be found here: https://github.com/OpenEnergyPlatform/oeplatform/wiki

In [1]:
__copyright__ = "Reiner Lemoine Institut, Zentrum für nachhaltige Energiesysteme Flensburg"
__license__   = "GNU Affero General Public License Version 3 (AGPL-3.0)"
__url__       = "https://github.com/openego/data_processing/blob/master/LICENSE"
__author__    = "Ludee, oakca"

In [2]:
import pandas as pd

# plot
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import plotly.graph_objs as go
import plotly.offline as pltly
# notebook
from IPython.display import Image
from IPython.core.display import HTML 

pltly.init_notebook_mode(connected=True)
%matplotlib inline

## Reading data from a csv files

Pandas has a read_csv function which makes importing a csv-file rather comfortable. It reads csv into a DataFrame. By default, it assumes that the fields are comma-separated. Our example file has columns with semicolons as separators, so we have to specify this when reading the file.

In [3]:
version = '1.3'

fn_wind = f'bnetza_mastr_{version}_wind'
df_wind = pd.read_csv(f'../data/bnetza_mastr_power-units_v{version}/{fn_wind}.csv', encoding='utf8', sep=';')

fn_hydro = f'bnetza_mastr_{version}_hydro'
df_hydro = pd.read_csv(f'../data/bnetza_mastr_power-units_v{version}/{fn_hydro}.csv', encoding='utf8', sep=';')

fn_biomass = f'bnetza_mastr_{version}_biomass'
df_biomass = pd.read_csv(f'../data/bnetza_mastr_power-units_v{version}/{fn_biomass}.csv', encoding='utf8', sep=';')


Columns (3,10,14,21,27,28,31,32,34,38,40,41,50,59,61,66,70,71,72,75,76,82,85,90,95,103,104,110,111) have mixed types. Specify dtype option on import or set low_memory=False.


Columns (10,27,28,31,32,50,85,95,96) have mixed types. Specify dtype option on import or set low_memory=False.



## Data

Looking at the first three lines of our dataframe:

In [4]:
df_wind[:3]

Unnamed: 0,EinheitMastrNummer,id,lid,Name,Einheitart,Einheittyp,Standort,Bruttoleistung,Erzeugungsleistung,EinheitBetriebsstatus,...,VerhaeltnisErtragsschaetzungReferenzertrag,VerhaeltnisReferenzertragErtrag5Jahre,VerhaeltnisReferenzertragErtrag10Jahre,VerhaeltnisReferenzertragErtrag15Jahre,AusschreibungZuschlag,Zuschlagsnummer,AnlageBetriebsstatus,VerknuepfteEinheit,version_e,timestamp_e
0,SME913777866086,0,0,824555,Stromerzeugungseinheit,Windeinheit,Erweiterung WEA8 605 39579 Schinne / Schinne,2300.0,,InBetrieb,...,"OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...",False,,InBetrieb,"[OrderedDict([('MaStRNummer', 'SME913777866086...",1.3,2019-04-12 06:01:26.916809
1,SME919223356767,1,1,823658,Stromerzeugungseinheit,Windeinheit,27. Jan 39638 Kassieck / Lindstedt,2300.0,,InBetrieb,...,"OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...",False,,InBetrieb,"[OrderedDict([('MaStRNummer', 'SME919223356767...",1.3,2019-04-12 06:01:27.220269
2,SME979806273782,2,2,823528,Stromerzeugungseinheit,Windeinheit,11. Jan 39638 Kassieck / Lindstedt,2300.0,,InBetrieb,...,"OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...",False,,InBetrieb,"[OrderedDict([('MaStRNummer', 'SME979806273782...",1.3,2019-04-12 06:01:27.515402


In [5]:
df_hydro[:3]

Unnamed: 0,EinheitMastrNummer,id,lid,Name,Einheitart,Einheittyp,Standort,Bruttoleistung,Erzeugungsleistung,EinheitBetriebsstatus,...,DatumLetzteAktualisierung_e,EegInbetriebnahmedatum,AnlagenschluesselEeg,AnlagenkennzifferAnlagenregister,InstallierteLeistung,AnlageBetriebsstatus,Ertuechtigung,VerknuepfteEinheit,version_e,timestamp_e
0,SME922277414628,250,250,Wasserkraftanlage Kettwig,Stromerzeugungseinheit,Wasser,Zur alten Fähre 32 45219 Essen,1033.0,,InBetrieb,...,,,,,,,,,,
1,SME968170947832,252,252,Schliffgesmühle,Stromerzeugungseinheit,Wasser,Schliffgesmühle 55566 Meddersheim,170.0,,InBetrieb,...,,,,,,,,,,
2,SME947601003972,267,267,Wasserkraftwerk Dreis,Stromerzeugungseinheit,Wasser,Mühlenstraße 22 54518 Dreis,50.0,,InBetrieb,...,,,,,,,,,,


In [6]:
df_biomass[:3]

Unnamed: 0,EinheitMastrNummer,id,lid,Name,Einheitart,Einheittyp,Standort,Bruttoleistung,Erzeugungsleistung,EinheitBetriebsstatus,...,BiogasLeistungserhoehung,BiogasDatumLeistungserhoehung,BiogasUmfangLeistungserhoehung,BiogasGaserzeugungskapazitaet,BiogasHoechstbemessungsleistung,BiomethanErstmaligerEinsatz,AnlageBetriebsstatus,VerknuepfteEinheit,version_e,timestamp_e
0,SME949665310874,20,20,,Stromerzeugungseinheit,Biomasse,Am Dalldorfer Weg 23 825 39397 Gröningen / Grö...,75.0,,InBetrieb,...,,,,"OrderedDict([('Wert', None), ('NichtVorhanden'...",,"OrderedDict([('Wert', None), ('NichtVorhanden'...",InBetrieb,"[OrderedDict([('MaStRNummer', 'SME949665310874...",1.3,2019-04-12 13:09:57.555262
1,SME970786384819,21,21,,Stromerzeugungseinheit,Biomasse,Bellinger Weg 81 39576 Stendal / Dahrenstedt,75.0,,InBetrieb,...,,,,"OrderedDict([('Wert', None), ('NichtVorhanden'...",,"OrderedDict([('Wert', None), ('NichtVorhanden'...",InBetrieb,"[OrderedDict([('MaStRNummer', 'SME970786384819...",1.3,2019-04-12 13:09:58.474807
2,SME968630787428,62,62,,Stromerzeugungseinheit,Biomasse,Lüffingen 9028 L 97 98 39638 Gardelegen / Lüf...,75.0,,InBetrieb,...,,,,"OrderedDict([('Wert', None), ('NichtVorhanden'...",,"OrderedDict([('Wert', None), ('NichtVorhanden'...",InBetrieb,"[OrderedDict([('MaStRNummer', 'SME968630787428...",1.3,2019-04-12 13:09:59.797561


## Wind

In [7]:
# Facts dict
info_dict_wind = {}
info_dict_wind['Filename'] = ['{}_mastr_plot_wind'.format(pd.to_datetime('today').strftime("%Y-%m-%d"))]
info_dict_wind['Einheitart'] = df_wind.loc[:,'Einheitart'].unique()
info_dict_wind['Einheittyp'] = df_wind.loc[:,'Einheittyp'].unique()
info_dict_wind['Title'] = ['MaStR Windeinheit']
info_dict_wind['Anzahl Kraftwerke'] = [len(df_wind)]
info_dict_wind['Min Leistung [KW]'] = [min(df_wind['Bruttoleistung'])]
info_dict_wind['Max Leistung [KW]'] = [max(df_wind['Bruttoleistung'])]
info_dict_wind['Total Leistung [KW]'] = [df_wind['Bruttoleistung'].sum()]
#info_dict_wind['Metadata'] = meta_out

# Print facts
for x in info_dict_wind:
    print('{:20} {}'.format(x, info_dict_wind[x]))

Filename             ['2019-05-15_mastr_plot_wind']
Einheitart           ['Stromerzeugungseinheit']
Einheittyp           ['Windeinheit']
Title                ['MaStR Windeinheit']
Anzahl Kraftwerke    [32924]
Min Leistung [KW]    [0.01]
Max Leistung [KW]    [24000.0]
Total Leistung [KW]  [66631601.463]


In [8]:
# Interactive plot
df_wind = df_wind.sort_values(['Bruttoleistung'], ascending=[False])
data = [go.Scatter(
    #x = df_wind.loc[:,'id'],
    y = df_wind.loc[:,'Bruttoleistung'])]

# Set layout
layout = go.Layout(
    height=400, width = 800,
    #title='CO2-Emissions in EU28',
    title=''.join(info_dict_wind['Title']),
    # yaxis=dict(title='CO2-Emissions in Mt') )
    yaxis=dict(title='Leistung [KW]') )
ifig = go.Figure(data=data, layout=layout)
pltly.iplot(ifig)

In [9]:
# Min/Durchschnitt/Max Leistung
df_wind_min = min(df_wind['Bruttoleistung'])
df_wind_avg = df_wind['Bruttoleistung'].sum() / len(df_wind)
df_wind_max = max(df_wind['Bruttoleistung'])

# Bar Graph
data = [go.Bar(
    x = ['Min Leistung', 'Durchschnitt Leistung', 'Max Leistung'],
    y = [df_wind_min, df_wind_avg, df_wind_max]
         )]

# Set layout
layout = go.Layout(
    height=400, width = 800,
    title='Min/Durchschnitt/Max Leistung',
    yaxis=dict(title='Leistung [KW]') )
ifig = go.Figure(data=data, layout=layout)
pltly.iplot(ifig)

## Hydro

In [10]:
# Facts dict
info_dict_hydro = {}
info_dict_hydro['Filename'] = ['{}_mastr_plot_hydro' .format(pd.to_datetime('today').strftime("%Y-%m-%d"))]
info_dict_hydro['Einheitart'] = df_hydro.loc[:,'Einheitart'].unique()
info_dict_hydro['Einheittyp'] = df_hydro.loc[:,'Einheittyp'].unique()
info_dict_hydro['Title'] = ['MaStR Wassereinheit']
info_dict_hydro['Anzahl Kraftwerke'] = [len(df_hydro)]
info_dict_hydro['Min Leistung [KW]'] = [min(df_hydro['Bruttoleistung'])]
info_dict_hydro['Max Leistung [KW]'] = [max(df_hydro['Bruttoleistung'])]
info_dict_hydro['Total Leistung [KW]'] = [df_hydro['Bruttoleistung'].sum()]
#info_dict_hydro['Metadata'] = meta_out

# Print facts
for x in info_dict_hydro:
    print('{:20} {}'.format(x, info_dict_hydro[x]))

Filename             ['2019-05-15_mastr_plot_hydro']
Einheitart           ['Stromerzeugungseinheit']
Einheittyp           ['Wasser']
Title                ['MaStR Wassereinheit']
Anzahl Kraftwerke    [8160]
Min Leistung [KW]    [0.75]
Max Leistung [KW]    [500000.0]
Total Leistung [KW]  [5415503.85]


In [11]:
# Interactive plot
df_hydro = df_hydro.sort_values(['Bruttoleistung'], ascending=[False])
data = [go.Scatter(
    #x = df_hydro.loc[:,'id'],
    y = df_hydro.loc[:,'Bruttoleistung'])]

# Set layout
layout = go.Layout(
    height=400, width = 800,
    #title='CO2-Emissions in EU28',
    title=''.join(info_dict_hydro['Title']),
    # yaxis=dict(title='CO2-Emissions in Mt') )
    yaxis=dict(title='Leistung [KW]') )
ifig = go.Figure(data=data, layout=layout)
pltly.iplot(ifig)

In [12]:
# Min/Durchschnitt/Max Leistung
df_hydro_min = min(df_hydro['Bruttoleistung'])
df_hydro_avg = df_hydro['Bruttoleistung'].sum() / len(df_hydro)
df_hydro_max = max(df_hydro['Bruttoleistung'])

# Bar Graph
data = [go.Bar(
    x = ['Min Leistung', 'Durchschnitt Leistung', 'Max Leistung'],
    y = [df_hydro_min, df_hydro_avg, df_hydro_max]
         )]

# Set layout
layout = go.Layout(
    height=400, width = 800,
    title='Min/Durchschnitt/Max Leistung',
    yaxis=dict(title='Leistung [KW]') )
ifig = go.Figure(data=data, layout=layout)
pltly.iplot(ifig)

## Biomass

In [13]:
# Facts dict
info_dict_biomass = {}
info_dict_biomass['Filename'] = ['{}_mastr_plot_biomass' .format(pd.to_datetime('today').strftime("%Y-%m-%d"))]
info_dict_biomass['Einheitart'] = df_biomass.loc[:,'Einheitart'].unique()
info_dict_biomass['Einheittyp'] = df_biomass.loc[:,'Einheittyp'].unique()
info_dict_biomass['Title'] = ['MaStR Biomasseeinheit']
info_dict_biomass['Anzahl Kraftwerke'] = [len(df_biomass)]
info_dict_biomass['Min Leistung [KW]'] = [min(df_biomass['Bruttoleistung'])]
info_dict_biomass['Max Leistung [KW]'] = [max(df_biomass['Bruttoleistung'])]
info_dict_biomass['Total Leistung [KW]'] = [df_biomass['Bruttoleistung'].sum()]
#info_dict_biomass['Metadata'] = meta_out

# Print facts
for x in info_dict_biomass:
    print('{:20} {}'.format(x, info_dict_biomass[x]))

Filename             ['2019-05-15_mastr_plot_biomass']
Einheitart           ['Stromerzeugungseinheit']
Einheittyp           ['Biomasse']
Title                ['MaStR Biomasseeinheit']
Anzahl Kraftwerke    [25369]
Min Leistung [KW]    [0.0]
Max Leistung [KW]    [135000.0]
Total Leistung [KW]  [11637249.704]


In [14]:
# Interactive plot
df_biomass = df_biomass.sort_values(['Bruttoleistung'], ascending=[False])
data = [go.Scatter(
    #x = df_biomass.loc[:,'id'],
    y = df_biomass.loc[:,'Bruttoleistung'])]

# Set layout
layout = go.Layout(
    height=400, width = 800,
    #title='CO2-Emissions in EU28',
    title=''.join(info_dict_biomass['Title']),
    # yaxis=dict(title='CO2-Emissions in Mt') )
    yaxis=dict(title='Leistung [KW]') )
ifig = go.Figure(data=data, layout=layout)
pltly.iplot(ifig)

In [15]:
# Min/Durchschnitt/Max Leistung
df_biomass_min = min(df_biomass['Bruttoleistung'])
df_biomass_avg = df_biomass['Bruttoleistung'].sum() / len(df_biomass)
df_biomass_max = max(df_biomass['Bruttoleistung'])

# Bar Graph
data = [go.Bar(
    x = ['Min Leistung', 'Durchschnitt Leistung', 'Max Leistung'],
    y = [df_biomass_min, df_biomass_avg, df_biomass_max]
         )]

# Set layout
layout = go.Layout(
    height=400, width = 800,
    title='Min/Durchschnitt/Max Leistung',
    yaxis=dict(title='Leistung [KW]') )
ifig = go.Figure(data=data, layout=layout)
pltly.iplot(ifig)