Skip to content

Commit

Permalink
1. NUTS classification
Browse files Browse the repository at this point in the history
2. Automatic presentation of sources in the notebooks.
  • Loading branch information
milos-simic committed Jun 30, 2019
1 parent 880b06d commit cadfe14
Show file tree
Hide file tree
Showing 10 changed files with 899 additions and 294 deletions.
345 changes: 307 additions & 38 deletions download_and_process.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion input/countries.csv
Expand Up @@ -7,7 +7,7 @@ From the beginning of 2019, BNetzA has switched to the Marktstammdatenregister (
"
FR,France,"Summed capacity and number of installations per energy source per municipality (Commune).",""
DK,Denmark,"Wind and phovoltaic power plants with a high level of detail.",""
CH,Switzerland,"All renewable-energy power plants supported by the feed-in-tariff KEV.",""
CH,Switzerland,"All renewable-energy power plants supported by the feed-in-tariff KEV (Kostendeckende Einspeisevergütung).",""
PL,Poland,"Summed capacity and number of installations per energy source per municipality (Powiat).",""
UK,United Kingdom,"Renewable-energy power plants in the United Kingdom.",""

44 changes: 26 additions & 18 deletions input/sources.csv
@@ -1,18 +1,26 @@
source,country,url,filename,file_type,active
bnetza,DE,https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/ErneuerbareEnergien/ZahlenDatenInformationen/VOeFF_Registerdaten/2018_12_Veroeff_RegDaten.xlsx?__blob=publicationFile&v=2,2017_12_Veroeff_RegDaten.xlsx,data,yes
bnetza_pv_historic,DE,https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/ErneuerbareEnergien/ZahlenDatenInformationen/PV_Datenmeldungen/Archiv_PV/Meldungen_Aug-Juni2017.xlsx?__blob=publicationFile&v=2,Meldungen_Aug-Juni2017.xlsx,data,yes
bnetza_pv,DE,https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/ErneuerbareEnergien/ZahlenDatenInformationen/PV_Datenmeldungen/Meldungen_Juli17-Dez18.xlsx?__blob=publicationFile&v=2,Meldungen_Juli17-Dez18.xlsx,data,no
Amprion,DE,https://www.netztransparenz.de/portals/1/Content/Erneuerbare-Energien-Gesetz/EEG-Anlagestammdaten/Netztransparenz%20Anlagenstammdaten%202017%20Amprion%20GmbH_V03.zip,Netztransparenz%20Anlagenstammdaten%202017%20Amprion%20GmbH_V03,data,yes
TransnetBW,DE,https://www.netztransparenz.de/portals/1/Content/Erneuerbare-Energien-Gesetz/EEG-Anlagestammdaten/Netztransparenz%20Anlagenstammdaten%202017%20TransnetBW%20GmbH.zip,Netztransparenz%20Anlagenstammdaten%202017%20TransnetBW%20GmbH,data,yes
50Hertz,DE,https://www.netztransparenz.de/portals/1/Content/Erneuerbare-Energien-Gesetz/EEG-Anlagestammdaten/Netztransparenz%20Anlagenstammdaten%202017%2050Hertz%20Transmission%20GmbH.zip,Netztransparenz%20Anlagenstammdaten%202017%2050Hertz%20Transmission%20GmbH,data,yes
TenneT,DE,https://www.netztransparenz.de/portals/1/Content/Erneuerbare-Energien-Gesetz/EEG-Anlagestammdaten/Netztransparenz%20Anlagenstammdaten%202017%20TenneT%20TSO%20GmbH.zip,Netztransparenz%20Anlagenstammdaten%202017%20TenneT%20TSO%20GmbH,data,yes
Energistyrelsen,DK,https://ens.dk/sites/ens.dk/files/Statistik/anlaegprodtilnettet.xls,anlaegprodtilnettet.xls,data,yes
Energinet,DK,https://data.open-power-system-data.org/renewable_power_plants/2018-03-08/original_data/SolcellerGraf-2016-11.xlsx,SolcellerGraf-2016-11.xlsx,data,no
gouv.fr,FR,http://www.statistiques.developpement-durable.gouv.fr/sites/default/files/2019-01/donnees-locales-2017-loi2000-secretise.xls,donnees-locales-2017-loi2000-secretise.xls,data,yes
Urzad Regulacji Energetyki,PL,https://data.open-power-system-data.org/renewable_power_plants/2018-03-08/original_data/simple.rtf,simple.rtf,data,no
BFE,CH,https://www.bfe.admin.ch/bfe/de/home/foerderung/erneuerbare-energien/einspeiseverguetung/_jcr_content/par/tabs/items/tab/tabpar/externalcontent.external.exturl.xlsx/aHR0cHM6Ly9wdWJkYi5iZmUuYWRtaW4uY2gvZGUvcHVibGljYX/Rpb24vZG93bmxvYWQvOTMxMC54bHN4.xlsx,9310.xlsx,data,yes
BEIS,UK,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/795492/renewable-energy-public-database-q1-2019.csv,renewable-energy-public-database-q1-2019.csv,data,yes
Geonames,DK,http://download.geonames.org/export/zip/DK.zip,DK.zip,geo,yes
Opendatasoft,FR,http://public.opendatasoft.com/explore/dataset/code-postal-code-insee-2015/download/?format=csv&timezone=Europe/Berlin&use_labels_for_header=true,code-postal-code-insee-2015.csv,geo,yes
Geonames,CH,http://download.geonames.org/export/zip/CH.zip,CH.zip,geo,yes
Geonames,UK,http://download.geonames.org/export/zip/GB_full.csv.zip,GB_full.csv.zip,geo,yes
source,full_name,country,url,filename,file_type,active,short_description,long_description
bnetza,Bundesnetzagentur,DE,https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/ErneuerbareEnergien/ZahlenDatenInformationen/VOeFF_Registerdaten/2018_12_Veroeff_RegDaten.xlsx?__blob=publicationFile&v=2,2017_12_Veroeff_RegDaten.xlsx,data,yes,Bundesnetzagentur register of renewable power plants (excl. PV),
bnetza_pv_historic,bnetza_pv_historic?,DE,https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/ErneuerbareEnergien/ZahlenDatenInformationen/PV_Datenmeldungen/Archiv_PV/Meldungen_Aug-Juni2017.xlsx?__blob=publicationFile&v=2,Meldungen_Aug-Juni2017.xlsx,data,yes,Bundesnetzagentur register of PV power plants,
bnetza_pv,bnetza_pv?,DE,https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/ErneuerbareEnergien/ZahlenDatenInformationen/PV_Datenmeldungen/Meldungen_Juli17-Dez18.xlsx?__blob=publicationFile&v=2,Meldungen_Juli17-Dez18.xlsx,data,no,Bundesnetzagentur register of PV power plants,
Amprion,Amprion,DE,https://www.netztransparenz.de/portals/1/Content/Erneuerbare-Energien-Gesetz/EEG-Anlagestammdaten/Netztransparenz%20Anlagenstammdaten%202017%20Amprion%20GmbH_V03.zip,Netztransparenz%20Anlagenstammdaten%202017%20Amprion%20GmbH_V03,data,yes,Netztransparenz.de - information platform of German TSOs (register of renewable power plants in their control area),
TransnetBW,TransnetBW,DE,https://www.netztransparenz.de/portals/1/Content/Erneuerbare-Energien-Gesetz/EEG-Anlagestammdaten/Netztransparenz%20Anlagenstammdaten%202017%20TransnetBW%20GmbH.zip,Netztransparenz%20Anlagenstammdaten%202017%20TransnetBW%20GmbH,data,yes,Netztransparenz.de - information platform of German TSOs (register of renewable power plants in their control area),
50Hertz,50Hertz,DE,https://www.netztransparenz.de/portals/1/Content/Erneuerbare-Energien-Gesetz/EEG-Anlagestammdaten/Netztransparenz%20Anlagenstammdaten%202017%2050Hertz%20Transmission%20GmbH.zip,Netztransparenz%20Anlagenstammdaten%202017%2050Hertz%20Transmission%20GmbH,data,yes,Netztransparenz.de - information platform of German TSOs (register of renewable power plants in their control area),
TenneT,Tennet,DE,https://www.netztransparenz.de/portals/1/Content/Erneuerbare-Energien-Gesetz/EEG-Anlagestammdaten/Netztransparenz%20Anlagenstammdaten%202017%20TenneT%20TSO%20GmbH.zip,Netztransparenz%20Anlagenstammdaten%202017%20TenneT%20TSO%20GmbH,data,yes,Netztransparenz.de - information platform of German TSOs (register of renewable power plants in their control area),
Energistyrelsen,Energystyrelsen,DK,https://ens.dk/sites/ens.dk/files/Statistik/anlaegprodtilnettet.xls,anlaegprodtilnettet.xls,data,yes,Energy Agency Denmark,"The Danish Energy Agency publishes a national master data register for wind turbines which was created in collaboration with the transmission system operators. The publication is monthly as an Excel file. The data set includes all electricity-generating wind turbines with information about technical data, location data and production data."
Energinet,Energinet,DK,https://data.open-power-system-data.org/renewable_power_plants/2018-03-08/original_data/SolcellerGraf-2016-11.xlsx,SolcellerGraf-2016-11.xlsx,data,no,Transmission system Operator in Denmark,"The photovoltaic statistic used to be published from Energinet, included information about location, year of implementing, installed capacity and number of systems. Since Energinet stopped publishing this, and the Danish Energy Agency, which has taken over the job of keeping that register has not published a register yet, we revert to old data for Danish solar from our previous OPSD data package version."
gouv.fr,Ministère de la Transition écologique et solidaire,FR,http://www.statistiques.developpement-durable.gouv.fr/sites/default/files/2019-01/donnees-locales-2017-loi2000-secretise.xls,donnees-locales-2017-loi2000-secretise.xls,data,yes,Ministry for the Ecological and Inclusive Transition,The data is annually published on the france [website for statistics](https://www.statistiques.developpement-durable.gouv.fr/donnees-locales-relatives-aux-installations-de-production-delectricite-renouvelable-beneficiant-0?rubrique=23&dossier=189) as an Excel file. The Excel chart includes number and installed capacity of the different renewable source for every municipality in France. It is limited to the plants which are covered by article 10 of february 2000 by an agreement to a purchase commitment.
Urzad Regulacji Energetyki,Urzad Regulacji Energetyki,PL,https://data.open-power-system-data.org/renewable_power_plants/2018-03-08/original_data/simple.rtf,simple.rtf,data,no,Energy Regulatory Office of Poland,"Number of installations and installed capacity per energy source of renewable energy, summed per powiat (districts) is illustrated on the page and can be downloaded as rtf-file. However, the file is no longer available. Therefore we use an old version from OPSD."
BFE,Bundesamt für Energie,CH,https://www.bfe.admin.ch/bfe/de/home/foerderung/erneuerbare-energien/einspeiseverguetung/_jcr_content/par/tabs/items/tab/tabpar/externalcontent.external.exturl.xlsx/aHR0cHM6Ly9wdWJkYi5iZmUuYWRtaW4uY2gvZGUvcHVibGljYX/Rpb24vZG93bmxvYWQvOTMxMC54bHN4.xlsx,9310.xlsx,data,yes,Swiss Federal Office of Energy,Data of all renewable power plants receiving 'Kostendeckende Einspeisevergütung' (KEV) which is the Swiss feed in tarif for renewable power plants. Geodata is based on municipality codes.
BEIS,"UK Government Department for Business, Energy & Industrial Strategy",UK,https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/795492/renewable-energy-public-database-q1-2019.csv,renewable-energy-public-database-q1-2019.csv,data,yes,Renewable Energy Planning Database quarterly extract,"The Renewable Energy Planning Database (REPD) is managed by Eunomia Research and Consulting Ltd (Eunomia) on behalf of the Department of Business, Energy & Industrial Strategy (BEIS). The database tracks the progress of renewable electricity projects (including those that could also be used for combined heat and power (CHP))."
Geonames,GeoNames,DK,http://download.geonames.org/export/zip/DK.zip,DK.zip,geo,yes,Geoinformation on postal codes in Denmark,
Opendatasoft,Opendatasoft,FR,http://public.opendatasoft.com/explore/dataset/code-postal-code-insee-2015/download/?format=csv&timezone=Europe/Berlin&use_labels_for_header=true,code-postal-code-insee-2015.csv,geo,yes,Code Postal - Code INSEE,
Geonames,GeoNames,CH,http://download.geonames.org/export/zip/CH.zip,CH.zip,geo,yes,Geoinformation on postal codes in Switzerland,
Geonames,GeoNames,UK,http://download.geonames.org/export/zip/GB_full.csv.zip,GB_full.csv.zip,geo,yes,Geoinformation on postal codes in the United Kingdom,
Eurostat,Eurostat,UK,http://ec.europa.eu/eurostat/tercet/download.do?file=pc2018_uk_NUTS-2016_v1.0.zip,pc2018_uk_NUTS-2016_v1.0.zip,geo,yes,Correspondence table between UK postal codes and NUTS-3 classification codes,
Eurostat,Eurostat,PL,http://ec.europa.eu/eurostat/tercet/download.do?file=pc2018_pl_NUTS-2016_v1.0.zip,pc2018_pl_NUTS-2016_v1.0.zip,geo,yes,Correspondence table between PL postal codes and NUTS-3 classification codes,
Eurostat,Eurostat,CH,http://ec.europa.eu/eurostat/tercet/download.do?file=pc2018_ch_NUTS-2016_v1.0.zip,pc2018_ch_NUTS-2016_v1.0.zip,geo,yes,Correspondence table between CH postal codes and NUTS-3 classification codes,
Eurostat,Eurostat,FR,http://ec.europa.eu/eurostat/tercet/download.do?file=pc2018_fr_NUTS-2016_v1.0.zip,pc2018_fr_NUTS-2016_v1.0.zip,geo,yes,Correspondence table between FR postal codes and NUTS-3 classification codes,
Eurostat,Eurostat,DK,http://ec.europa.eu/eurostat/tercet/download.do?file=pc2018_dk_NUTS-2016_v1.0.zip,pc2018_dk_NUTS-2016_v1.0.zip,geo,yes,Correspondence table between DK postal codes and NUTS-3 classification codes,
Eurostat,Eurostat,DE,http://ec.europa.eu/eurostat/tercet/download.do?file=pc2018_de_NUTS-2016_v1.0.zip,pc2018_de_NUTS-2016_v1.0.zip,geo,yes,Correspondence table between DE postal codes and NUTS-3 classification codes,
Eurostat,Eurostat,EU,https://ec.europa.eu/eurostat/documents/345175/501971/EU-28-LAU-2018-NUTS-2016.xlsx,EU-28-LAU-2018-NUTS-2016.xlsx,geo,yes,Mapping from LAU to NUTS,
Eurostat_shapefile,Eurostat,EU,https://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/nuts/download/ref-nuts-2016-01m.shp.zip,ref-nuts-2016-01m.shp.zip,geo,yes,Shapefile,
94 changes: 48 additions & 46 deletions main.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions util/__init__.py
@@ -0,0 +1 @@
__all__ = ['helper', 'visualizer']
92 changes: 64 additions & 28 deletions util/downloader.py
Expand Up @@ -24,6 +24,49 @@

from .helper import get_beis_link

def download_and_cache(url, session=None, download_directory_path=None, filename=None):
"""
This function downloads a file into the folder whose name is defined by the parameter input_directory_path.
Returns the local filepath.
If filename is specified, the local file will be named so.
"""
if user_agent is None:
user_agent = fake_useragent.UserAgent()

if filename is None:
path = urllib.parse.urlsplit(url).path
filename = str(posixpath.basename(path))

split_path = input_directory_path.split(os.sep)
download_path_parts = split_path + [filename]
download_path_parts = [part for part in download_path_parts if part is not None]
filepath = os.path.join(*download_path_parts)

os.makedirs(download_directory_path, exist_ok=True)

# check if file exists, if not download it
if not os.path.exists(filepath):
if not session:
#print('No session')
session = requests.session()

print("Downloading file ", filename, " from ", url)
headers = {'User-Agent' : self.user_agent.random}
response = session.get(url, headers=headers, stream=True)

chuncksize = 1024
with open(filepath, 'wb') as file_handler:
for chunck in response.iter_content(chuncksize):
file_handler.write(chunck)
print('Downloading: done.')
else:
print("Using local file from", filepath)

filepath = '' + filepath


return filepath

class Downloader(object):
"""docstring for Downloader"""
def __init__(self, version, input_directory_path, source_path, download_from):
Expand All @@ -34,6 +77,11 @@ def __init__(self, version, input_directory_path, source_path, download_from):
self.source_df = pd.read_csv(source_path)
self.download_from = download_from

def set_input_directory_path(self, input_directory_path):
self.input_directory_path = input_directory_path

def get_input_directory_path(self):
return self.input_directory_path

def download_and_cache(self, url, session=None, filename=None, country=None, source_name=None):
"""
Expand All @@ -48,7 +96,6 @@ def download_and_cache(self, url, session=None, filename=None, country=None, sou
if filename is None:
path = urllib.parse.urlsplit(url).path
filename = str(posixpath.basename(path))
#print(url, "fff", len(filename))

split_path = self.input_directory_path.split(os.sep)
download_path_parts = split_path + [country, source_name, filename]
Expand Down Expand Up @@ -101,56 +148,45 @@ def unzip_and_mark(self, filepath):
shutil.copy(filepath, new_filepath)

def get_filenames_for_opsd(self, source_df):
filenames_by_source = source_df[self.source_df['file_type'] == 'data'][['source', 'filename']]
filenames_by_source = source_df[['source', 'filename']]
filenames_by_source = filenames_by_source.set_index('source')
filenames_by_source = filenames_by_source.to_dict()['filename']
geo_url = None
if 'geo' in source_df['file_type'].values:
geo_filename = source_df[source_df['file_type'] == 'geo'].iloc[0]['filename']
geo_link = self.get_opsd_download_url(geo_filename)
geo_url = {'url' : geo_link, 'filename' : geo_filename}

return filenames_by_source, geo_url
return filenames_by_source

def get_download_urls(self, country):
source_df = self.source_df[self.source_df['country'] == country]
geo_url = None

if self.download_from == 'original_sources':
data_urls = {}
# check if there are inactive urls
inactive_df = source_df[source_df['active'] == 'no']
if not inactive_df.empty:
filenames_by_source, geo_url = self.get_filenames_for_opsd(inactive_df)
data_urls.update({source : {'url' : self.get_opsd_download_url(filenames_by_source[source])} for source in filenames_by_source})
filenames_by_source = self.get_filenames_for_opsd(inactive_df)
for source in filenames_by_source:
filename = filenames_by_source[source]
data_urls.update({source : {'url' : self.get_opsd_download_url(filename), 'filename' : filename}})

active_df = source_df[source_df['active'] == 'yes']
urls = active_df[active_df['file_type'] == 'data'][['source', 'url', 'filename']]
urls = active_df[['source', 'url', 'filename']]
urls = urls.set_index('source')
data_urls.update(urls.to_dict(orient='index'))
if 'geo' in active_df['file_type'].values:
geo_link = source_df[source_df['file_type'] == 'geo'].iloc[0]['url']
geo_filename = source_df[source_df['file_type'] == 'geo'].iloc[0]['filename']
geo_url = {'url' : geo_link, 'filename' : geo_filename}

elif self.download_from == 'opsd_server':
filenames_by_source, geo_url = self.get_filenames_for_opsd(source_df)
filenames_by_source = self.get_filenames_for_opsd(source_df)
data_urls = {source : {'url' : self.get_opsd_download_url(filenames_by_source[source])} for source in filenames_by_source}
else:
raise ValueError('download_from must be "original_sources" or "opsd_server".')

return {'data': data_urls, 'geo': geo_url}
return data_urls

def download_data_for_country(self, country):
urls = self.get_download_urls(country)
local_paths = {}
if urls['geo'] is not None:
url = urls['geo']['url']
filename = urls['geo'].get('filename', None)
geopath = self.download_and_cache(url, country=country, filename=filename)
local_paths['geo'] = geopath
#print(url, "------------------>", geopath)

for source_name in urls['data']:
url = urls['data'][source_name]['url']
filename = urls['data'].get('filename', None)

for source_name in urls:
url = urls[source_name]['url']
filename = urls[source_name]['filename']
datapath = self.download_and_cache(url, country=country, source_name=source_name, filename=filename)
local_paths[source_name] = datapath
#print(source_name, url, "------------------>", datapath)
Expand Down

0 comments on commit cadfe14

Please sign in to comment.