# Parc region annuel de production filiere
## Electrical power capacity per region from 2008 to 2019
## Data preparation 

### Librairies

In [4]:
import os
import requests
import json
import pandas as pd
import geopandas as gpd
import pickle

### Upload and save datasets

In [2]:
!ls ../raw-data

eco2mix-national-cons-def.csv
eco2mix-national-tr.csv
eco2mix-regional-cons-def.csv
eco2mix-regional-tr.csv
parc-region-annuel-production-filiere.geojson
prod-region-annuelle-filiere.csv
production-quotidienne-filiere.csv
regions-version-simplifiee.geojson
temperature-quotidienne-regionale.csv


In [9]:
'''Load the json file from the API '''
# response = requests.get('https://opendata.reseaux-energies.fr/api/v2/catalog/datasets/parc-region-annuel-production-filiere/exports/json?rows=-1&pretty=false&timezone=UTC')

'''Save Json file'''
# json_file = os.path.join('..', 'data', 'parc-region-annuel-production-filiere.json')
# with open(json_file, 'w') as f:
#     json.dump(response.json(), f)

'''Convert json to DataFRame'''
# df = pd.DataFrame(response.json())

'''Save DataFrame'''
# filename = os.path.join('..', 'data', 'parc-region-annuel-production-filiere.df')
# df.to_pickle(filename)

'Save DataFrame'

### Load dataset from local directory

In [19]:
!ls ../raw-data

eco2mix-national-cons-def.csv
eco2mix-national-tr.csv
eco2mix-regional-cons-def.csv
eco2mix-regional-tr.csv
parc-region-annuel-production-filiere.geojson
prod-region-annuelle-filiere.csv
prod-region-annuelle-filiere.geojson
production-quotidienne-filiere.csv
regions-version-simplifiee.geojson
temperature-quotidienne-regionale.csv


In [21]:
df_name = os.path.join('..', 'raw-data', 'prod-region-annuelle-filiere.geojson')
df = gpd.read_file(df_name)

In [22]:
df.head()

Unnamed: 0,production_eolienne,production_nucleaire,production_bioenergies,production_hydraulique,region,code_insee_region,annee,production_solaire,production_thermique,geometry
0,758.0,77701.0,148.0,189.0,Centre-Val de Loire,24,2008,0.4,743.0,"MULTIPOLYGON (((2.28109 46.42047, 2.36129 46.5..."
1,34.0,,,500.0,Corse,94,2008,0.2,861.0,"MULTIPOLYGON (((9.24861 41.33657, 9.24793 41.3..."
2,1330.0,72866.0,319.0,8578.0,Grand-Est,44,2009,9.8,9412.0,"MULTIPOLYGON (((3.48502 48.85185, 3.39598 48.7..."
3,476.0,63669.0,377.0,146.0,Normandie,28,2009,1.4,7439.0,"MULTIPOLYGON (((-1.77964 48.89202, -1.77996 48..."
4,1421.0,19796.0,520.0,11360.0,Occitanie,76,2009,54.9,744.0,"MULTIPOLYGON (((3.70088 43.39310, 3.70800 43.3..."


### Drop useless columns

In [23]:
df = df.drop(['code_insee_region'], axis=1)

### For region not producing, NaN replaced with 0

In [24]:
df = df.fillna(0)

### Adding regions centers to place region labels

In [25]:
df['centroid_lon'] = df['geometry'].centroid.x
df['centroid_lat'] = df['geometry'].centroid.y
df = df.drop(['geometry'], axis=1)

### Adding total energy sector per year and per region

In [27]:
df.columns

Index(['production_eolienne', 'production_nucleaire', 'production_bioenergies',
       'production_hydraulique', 'region', 'annee', 'production_solaire',
       'production_thermique', 'centroid_lon', 'centroid_lat'],
      dtype='object')

In [28]:
df['production_total'] = df['production_eolienne'] + df['production_solaire'] + df['production_thermique'] + \
df['production_nucleaire'] + df['production_hydraulique'] + df['production_bioenergies']   

In [32]:
df

Unnamed: 0,production_eolienne,production_nucleaire,production_bioenergies,production_hydraulique,region,annee,production_solaire,production_thermique,centroid_lon,centroid_lat,production_total
0,758.000000,77701.000000,148.000000,189.000000,Centre-Val de Loire,2008,0.400000,743.000000,1.681356,47.487546,79539.400000
1,34.000000,0.000000,0.000000,500.000000,Corse,2008,0.200000,861.000000,9.097200,42.155748,1395.200000
2,1330.000000,72866.000000,319.000000,8578.000000,Grand-Est,2009,9.800000,9412.000000,5.610096,48.688793,92514.800000
3,476.000000,63669.000000,377.000000,146.000000,Normandie,2009,1.400000,7439.000000,0.119606,49.121053,72108.400000
4,1421.000000,19796.000000,520.000000,11360.000000,Occitanie,2009,54.900000,744.000000,2.141486,43.700110,33895.900000
...,...,...,...,...,...,...,...,...,...,...,...
151,2661.643298,71625.389861,510.086497,83.779254,Centre-Val de Loire,2019,347.441810,427.242969,1.681356,47.487546,75655.583689
152,9.198621,0.000000,7.191249,403.395604,Corse,2019,220.198799,967.770000,9.097200,42.155748,1607.754273
153,7674.669825,76391.187299,1041.324731,8615.252070,Grand-Est,2019,574.973316,9809.587679,5.610096,48.688793,104106.994920
154,1967.801553,0.000000,421.886529,17.644206,Pays de la Loire,2019,599.290423,3444.073698,-0.817673,47.472842,6450.696409


In [35]:
energy_sectors = dict()

energy_sectors['nucleaire'] = {'title': 'Nuclear plant capacity installed in France',
                             'secteur': 'nucleaire',
                             'sector': 'Nuclear',
                             'parc_column_name': 'parc_nucleaire_mw',
                             'production_column_name': 'production_nucleaire'
}

energy_sectors['hydraulique'] = {'title': 'Hydro capacity installed in France',
                           'secteur': 'hydraulique',
                           'sector': 'Hydro',
                           'parc_column_name': 'parc_hydraulique_mw',
                           'production_column_name': 'production_hydraulique'
}

energy_sectors['thermique_fossile'] = {'title': 'Thermal plant capacity installed in France',
                                           'secteur': 'thermique_fossile',
                                           'sector': 'Fossil Fueled Thermal',
                                           'parc_column_name': 'parc_thermique_fossile_mw',
                                           'production_column_name': 'production_thermique'
}

energy_sectors['bioenergies'] = {'title': 'Bioenergy capacity installed in France',
                               'secteur': 'bioenergies',
                               'sector': 'Bioenergy',
                               'parc_column_name': 'parc_bioenergies_mw',
                               'production_column_name': 'production_bioenergies'
}

energy_sectors['eolien'] = {'title': 'Wind farm capacity installed in France',
                          'secteur': 'eolien',
                          'sector': 'Wind',
                          'parc_column_name': 'parc_eolien_mw',
                          'production_column_name': 'production_eolienne'
}

energy_sectors['solaire'] = {'title': 'Solar photovoltaic capacity installed in France',
                           'secteur': 'solaire',
                           'sector': 'Solar',
                           'parc_column_name': 'parc_solaire_mw',
                           'production_column_name': 'production_solaire'
}

energy_sectors['total'] = {'title': 'Total capacity installed in France',
                           'secteur': 'total',
                           'sector': 'Total',
                           'parc_column_name': 'parc_total_mw',
                           'production_column_name': 'production_total'
}

In [46]:
df

Unnamed: 0,production_eolienne,production_nucleaire,production_bioenergies,production_hydraulique,region,annee,production_solaire,production_thermique,centroid_lon,centroid_lat,production_total
0,758.000000,77701.000000,148.000000,189.000000,Centre-Val de Loire,2008,0.400000,743.000000,1.681356,47.487546,79539.400000
1,34.000000,0.000000,0.000000,500.000000,Corse,2008,0.200000,861.000000,9.097200,42.155748,1395.200000
2,1330.000000,72866.000000,319.000000,8578.000000,Grand-Est,2009,9.800000,9412.000000,5.610096,48.688793,92514.800000
3,476.000000,63669.000000,377.000000,146.000000,Normandie,2009,1.400000,7439.000000,0.119606,49.121053,72108.400000
4,1421.000000,19796.000000,520.000000,11360.000000,Occitanie,2009,54.900000,744.000000,2.141486,43.700110,33895.900000
...,...,...,...,...,...,...,...,...,...,...,...
151,2661.643298,71625.389861,510.086497,83.779254,Centre-Val de Loire,2019,347.441810,427.242969,1.681356,47.487546,75655.583689
152,9.198621,0.000000,7.191249,403.395604,Corse,2019,220.198799,967.770000,9.097200,42.155748,1607.754273
153,7674.669825,76391.187299,1041.324731,8615.252070,Grand-Est,2019,574.973316,9809.587679,5.610096,48.688793,104106.994920
154,1967.801553,0.000000,421.886529,17.644206,Pays de la Loire,2019,599.290423,3444.073698,-0.817673,47.472842,6450.696409


### Saving dataframe to pickle for app prepartion

In [31]:
'''Save DataFrame'''
df_filename = os.path.join('..', 'processed-data', 'prod-region-annuelle-filiere.df')
df.to_pickle(df_filename)
df.to_pickle('../app/data/prod-region-annuelle-filiere.df')