<img src="http://openenergy-platform.org/static/OEP_logo_2_no_text.svg" alt="OpenEnergy Platform" height="100" width="100"  align="left"/>
<img src="http://reiner-lemoine-institut.de//wp-content/uploads/2015/09/rlilogo.png" alt="RLI" height="100" width="100" align="right"/>

# OpenEnergyPlatform
<br><br>

# MaStR Analyze
Repository: https://github.com/OpenEnergyPlatform/data-preprocessing/tree/master/data-import/bnetza_mastr

Please report bugs and improvements here: https://github.com/OpenEnergyPlatform/data-preprocessing/issues <br>
How to get started with Jupyter Notebooks can be found here: https://github.com/OpenEnergyPlatform/oeplatform/wiki

In [1]:
__copyright__ = "Reiner Lemoine Institut, Zentrum für nachhaltige Energiesysteme Flensburg"
__license__   = "GNU Affero General Public License Version 3 (AGPL-3.0)"
__url__       = "https://github.com/openego/data_processing/blob/master/LICENSE"
__author__    = "Ludee"

In [2]:
import pandas as pd
import getpass
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker

# plot
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import plotly.graph_objs as go
import plotly.offline as pltly
import colorlover as cl
import seaborn as sns
# notebook
from IPython.display import Image
from IPython.core.display import HTML 

pltly.init_notebook_mode(connected=True)
%matplotlib inline

## Reading data from a csv files

Pandas has a read_csv function which makes importing a csv-file rather comfortable. It reads csv into a DataFrame. By default, it assumes that the fields are comma-separated. Our example file has columns with semicolons as separators, so we have to specify this when reading the file.

In [3]:
version = '1.3'

fn_wind = f'bnetza_mastr_{version}_wind'
df_wind = pd.read_csv(f'../data/bnetza_mastr_power-units_v{version}/{fn_wind}.csv', encoding='utf8', sep=';')

fn_hydro = f'bnetza_mastr_{version}_hydro'
df_hydro = pd.read_csv(f'../data/bnetza_mastr_power-units_v{version}/{fn_wind}.csv', encoding='utf8', sep=';')

fn_biomass = f'bnetza_mastr_{version}_biomass'
df_biomass = pd.read_csv(f'../data/bnetza_mastr_power-units_v{version}/{fn_wind}.csv', encoding='utf8', sep=';')


Columns (3,10,14,21,27,28,31,32,34,38,40,41,50,59,61,66,70,71,72,75,76,82,85,90,95,103,104,110,111) have mixed types. Specify dtype option on import or set low_memory=False.



Looking at the first three lines of our dataframe:

In [4]:
df_wind[:3]

Unnamed: 0,EinheitMastrNummer,id,lid,Name,Einheitart,Einheittyp,Standort,Bruttoleistung,Erzeugungsleistung,EinheitBetriebsstatus,...,VerhaeltnisErtragsschaetzungReferenzertrag,VerhaeltnisReferenzertragErtrag5Jahre,VerhaeltnisReferenzertragErtrag10Jahre,VerhaeltnisReferenzertragErtrag15Jahre,AusschreibungZuschlag,Zuschlagsnummer,AnlageBetriebsstatus,VerknuepfteEinheit,version_e,timestamp_e
0,SME913777866086,0,0,824555,Stromerzeugungseinheit,Windeinheit,Erweiterung WEA8 605 39579 Schinne / Schinne,2300.0,,InBetrieb,...,"OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...",False,,InBetrieb,"[OrderedDict([('MaStRNummer', 'SME913777866086...",1.3,2019-04-12 06:01:26.916809
1,SME919223356767,1,1,823658,Stromerzeugungseinheit,Windeinheit,27. Jan 39638 Kassieck / Lindstedt,2300.0,,InBetrieb,...,"OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...",False,,InBetrieb,"[OrderedDict([('MaStRNummer', 'SME919223356767...",1.3,2019-04-12 06:01:27.220269
2,SME979806273782,2,2,823528,Stromerzeugungseinheit,Windeinheit,11. Jan 39638 Kassieck / Lindstedt,2300.0,,InBetrieb,...,"OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...",False,,InBetrieb,"[OrderedDict([('MaStRNummer', 'SME979806273782...",1.3,2019-04-12 06:01:27.515402


In [13]:
df_hydro[:3]

Unnamed: 0,EinheitMastrNummer,id,lid,Name,Einheitart,Einheittyp,Standort,Bruttoleistung,Erzeugungsleistung,EinheitBetriebsstatus,...,VerhaeltnisErtragsschaetzungReferenzertrag,VerhaeltnisReferenzertragErtrag5Jahre,VerhaeltnisReferenzertragErtrag10Jahre,VerhaeltnisReferenzertragErtrag15Jahre,AusschreibungZuschlag,Zuschlagsnummer,AnlageBetriebsstatus,VerknuepfteEinheit,version_e,timestamp_e
0,SME913777866086,0,0,824555,Stromerzeugungseinheit,Windeinheit,Erweiterung WEA8 605 39579 Schinne / Schinne,2300.0,,InBetrieb,...,"OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...",False,,InBetrieb,"[OrderedDict([('MaStRNummer', 'SME913777866086...",1.3,2019-04-12 06:01:26.916809
1,SME919223356767,1,1,823658,Stromerzeugungseinheit,Windeinheit,27. Jan 39638 Kassieck / Lindstedt,2300.0,,InBetrieb,...,"OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...",False,,InBetrieb,"[OrderedDict([('MaStRNummer', 'SME919223356767...",1.3,2019-04-12 06:01:27.220269
2,SME979806273782,2,2,823528,Stromerzeugungseinheit,Windeinheit,11. Jan 39638 Kassieck / Lindstedt,2300.0,,InBetrieb,...,"OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...",False,,InBetrieb,"[OrderedDict([('MaStRNummer', 'SME979806273782...",1.3,2019-04-12 06:01:27.515402


In [14]:
df_biomass[:3]

Unnamed: 0,EinheitMastrNummer,id,lid,Name,Einheitart,Einheittyp,Standort,Bruttoleistung,Erzeugungsleistung,EinheitBetriebsstatus,...,VerhaeltnisErtragsschaetzungReferenzertrag,VerhaeltnisReferenzertragErtrag5Jahre,VerhaeltnisReferenzertragErtrag10Jahre,VerhaeltnisReferenzertragErtrag15Jahre,AusschreibungZuschlag,Zuschlagsnummer,AnlageBetriebsstatus,VerknuepfteEinheit,version_e,timestamp_e
0,SME913777866086,0,0,824555,Stromerzeugungseinheit,Windeinheit,Erweiterung WEA8 605 39579 Schinne / Schinne,2300.0,,InBetrieb,...,"OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...",False,,InBetrieb,"[OrderedDict([('MaStRNummer', 'SME913777866086...",1.3,2019-04-12 06:01:26.916809
1,SME919223356767,1,1,823658,Stromerzeugungseinheit,Windeinheit,27. Jan 39638 Kassieck / Lindstedt,2300.0,,InBetrieb,...,"OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...",False,,InBetrieb,"[OrderedDict([('MaStRNummer', 'SME919223356767...",1.3,2019-04-12 06:01:27.220269
2,SME979806273782,2,2,823528,Stromerzeugungseinheit,Windeinheit,11. Jan 39638 Kassieck / Lindstedt,2300.0,,InBetrieb,...,"OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...","OrderedDict([('Wert', None), ('NichtVorhanden'...",False,,InBetrieb,"[OrderedDict([('MaStRNummer', 'SME979806273782...",1.3,2019-04-12 06:01:27.515402


In [5]:
# Facts dict
info_dict_wind = {}
info_dict_wind['Filename'] = ['{}_mastr_plot_wind' .format(pd.to_datetime('today').strftime("%Y-%m-%d"))]
info_dict_wind['Einheitart'] = df_wind.loc[:,'Einheitart'].unique()
info_dict_wind['Einheittyp'] = df_wind.loc[:,'Einheittyp'].unique()
info_dict_wind['Title'] = ['MaStR Windeinheit']
#info_dict_wind['Metadata'] = meta_out

# Print facts
for x in info_dict_wind:
    print(x,':',info_dict_wind[x])

Filename : ['2019-05-03_mastr_plot_wind']
Einheitart : ['Stromerzeugungseinheit']
Einheittyp : ['Windeinheit']
Title : ['MaStR Windeinheit']


In [12]:
# Interactive plot
df_wind = df_wind.sort_values(['Bruttoleistung'], ascending=[False])
data = [go.Scatter(
    #x = df_wind.loc[:,'id'],
    y = df_wind.loc[:,'Bruttoleistung'])]

# Set layout
layout = go.Layout(
    height=400, width = 800,
    #title='CO2-Emissions in EU28',
    title=''.join(info_dict_wind['Title']),
    # yaxis=dict(title='CO2-Emissions in Mt') )
    yaxis=dict(title='Leistung') )
ifig = go.Figure(data=data, layout=layout)
pltly.iplot(ifig)

In [17]:
# Facts dict
info_dict_hydro = {}
info_dict_hydro['Filename'] = ['{}_mastr_plot_hydro' .format(pd.to_datetime('today').strftime("%Y-%m-%d"))]
info_dict_hydro['Einheitart'] = df_hydro.loc[:,'Einheitart'].unique()
info_dict_hydro['Einheittyp'] = df_hydro.loc[:,'Einheittyp'].unique()
info_dict_hydro['Title'] = ['MaStR Wassereinheit']
#info_dict_hydro['Metadata'] = meta_out

# Print facts
for x in info_dict_hydro:
    print(x,':',info_dict_hydro[x])

Filename : ['2019-05-03_mastr_plot_hydro']
Einheitart : ['Stromerzeugungseinheit']
Einheittyp : ['Windeinheit']
Title : ['MaStR Wassereinheit']


In [18]:
# Interactive plot
df_hydro = df_hydro.sort_values(['Bruttoleistung'], ascending=[False])
data = [go.Scatter(
    #x = df_hydro.loc[:,'id'],
    y = df_hydro.loc[:,'Bruttoleistung'])]

# Set layout
layout = go.Layout(
    height=400, width = 800,
    #title='CO2-Emissions in EU28',
    title=''.join(info_dict_hydro['Title']),
    # yaxis=dict(title='CO2-Emissions in Mt') )
    yaxis=dict(title='Leistung') )
ifig = go.Figure(data=data, layout=layout)
pltly.iplot(ifig)

In [19]:
# Facts dict
info_dict_biomass = {}
info_dict_biomass['Filename'] = ['{}_mastr_plot_biomass' .format(pd.to_datetime('today').strftime("%Y-%m-%d"))]
info_dict_biomass['Einheitart'] = df_biomass.loc[:,'Einheitart'].unique()
info_dict_biomass['Einheittyp'] = df_biomass.loc[:,'Einheittyp'].unique()
info_dict_biomass['Title'] = ['MaStR Biomasseeinheit']
#info_dict_biomass['Metadata'] = meta_out

# Print facts
for x in info_dict_biomass:
    print(x,':',info_dict_biomass[x])

Filename : ['2019-05-03_mastr_plot_biomass']
Einheitart : ['Stromerzeugungseinheit']
Einheittyp : ['Windeinheit']
Title : ['MaStR Biomasseeinheit']


In [21]:
# Interactive plot
df_biomass = df_biomass.sort_values(['Bruttoleistung'], ascending=[False])
data = [go.Scatter(
    #x = df_biomass.loc[:,'id'],
    y = df_biomass.loc[:,'Bruttoleistung'])]

# Set layout
layout = go.Layout(
    height=400, width = 800,
    #title='CO2-Emissions in EU28',
    title=''.join(info_dict_biomass['Title']),
    # yaxis=dict(title='CO2-Emissions in Mt') )
    yaxis=dict(title='Leistung') )
ifig = go.Figure(data=data, layout=layout)
pltly.iplot(ifig)