# Parc region annuel de production filiere
## Electrical power capacity per region from 2008 to 2019
## Data preparation 

### Librairies

In [2]:
import os
import requests
import json
import pandas as pd
import geopandas as gpd
import pickle

### Upload and save datasets

In [3]:
'''Load the json file from the API '''
# response = requests.get('https://opendata.reseaux-energies.fr/api/v2/catalog/datasets/parc-region-annuel-production-filiere/exports/json?rows=-1&pretty=false&timezone=UTC')

'''Save Json file'''
# json_file = os.path.join('..', 'data', 'parc-region-annuel-production-filiere.json')
# with open(json_file, 'w') as f:
#     json.dump(response.json(), f)

'''Convert json to DataFRame'''
# df = pd.DataFrame(response.json())

'''Save DataFrame'''
# filename = os.path.join('..', 'data', 'parc-region-annuel-production-filiere.df')
# df.to_pickle(filename)

'Save DataFrame'

### Load dataset from local directory

In [4]:
df_name = os.path.join('..', 'raw-data', 'parc-region-annuel-production-filiere.geojson')
df = gpd.read_file(df_name)

In [5]:
geojsonfile = os.path.join('..', 'raw-data', 'regions-version-simplifiee.geojson')
gdf = gpd.read_file(geojsonfile)

In [6]:
df.head()

Unnamed: 0,parc_eolien_mw,code_insee_region,parc_solaire_mw,parc_thermique_fossile_mw,region,parc_nucleaire_mw,parc_hydraulique_mw,annee,parc_bioenergies_mw,geometry
0,611.0,44,3.0,3585.0,Grand-Est,12580.0,2350.0,2008,71.0,"POLYGON ((7.57822 48.12144, 7.57795 48.12067, ..."
1,572.0,76,19.0,1651.0,Occitanie,2750.0,5378.0,2008,99.0,"MULTIPOLYGON (((4.10162 43.55436, 4.10101 43.5..."
2,462.0,53,11.0,644.0,Bretagne,,276.0,2009,31.0,"MULTIPOLYGON (((-1.94715 48.53889, -1.94730 48..."
3,92.0,27,30.0,664.0,Bourgogne-Franche-Comté,,513.0,2010,29.0,"POLYGON ((6.94079 47.43332, 6.94066 47.43102, ..."
4,19.0,11,49.0,5310.0,Ile-de-France,,17.0,2011,284.0,"POLYGON ((1.60962 49.07770, 1.61075 49.07799, ..."


In [7]:
gdf.head()

Unnamed: 0,code,nom,geometry
0,11,Île-de-France,"POLYGON ((2.59052 49.07965, 2.63327 49.10838, ..."
1,24,Centre-Val de Loire,"POLYGON ((2.87463 47.52042, 2.88845 47.50943, ..."
2,27,Bourgogne-Franche-Comté,"POLYGON ((3.62942 46.74946, 3.57569 46.74952, ..."
3,28,Normandie,"POLYGON ((-1.11962 49.35557, -1.07822 49.38849..."
4,32,Hauts-de-France,"POLYGON ((4.04797 49.40564, 4.03991 49.39740, ..."


### For region not producing, NaN replaced with 0

In [41]:
df = df.fillna(0)

### Adding regions centers to place region labels

In [42]:
gdf.loc[5, 'nom'] = 'Grand-Est'
gdf.loc[0, 'nom'] = 'Ile-de-France'

In [43]:
df['centroid_lon'] = df['geometry'].centroid.x
df['centroid_lat'] = df['geometry'].centroid.y
df = df.drop(['geometry'], axis=1)

### Adding total energy sector per year and per region

In [44]:
df['parc_total_mw'] = df['parc_eolien_mw'] + df['parc_solaire_mw'] + df['parc_thermique_fossile_mw'] + \
df['parc_nucleaire_mw'] + df['parc_hydraulique_mw'] + df['parc_bioenergies_mw']   

### Adding relative values for a given year and a givien energy sector

In [21]:
'''Function to create a new column with relative yearly values'''
def calcul_relative_values(column):
    new_column = pd.Series()
    for a in gdf['annee'].unique():
        new_column = pd.concat([new_column, gdf[gdf['annee'] == a][column] / gdf.groupby('annee')[column].sum().loc[a]], 
                        axis=0)
        
    return new_column.sort_index()

In [22]:
'''Adding the relative values in a new columns'''
for c in gdf.columns:
    if 'mw' in c:
        gdf[f'{c}_rel'] = pd.to_numeric(calcul_relative_values(c))





In [6]:
gdf.columns

Index(['parc_eolien_mw', 'code_insee_region', 'parc_solaire_mw',
       'parc_thermique_fossile_mw', 'region', 'parc_nucleaire_mw',
       'parc_hydraulique_mw', 'annee', 'parc_bioenergies_mw', 'geometry',
       'centroid_lon', 'centroid_lat', 'parc_total_mw'],
      dtype='object')

### Saving dataframe to pickle for app prepartion

In [14]:
!ls ..

app  input  ipynb  processed-data  raw-data


In [45]:
'''Save DataFrame'''

df_filename = os.path.join('..', 'processed-data', 'parc-region-annuel-production-filiere-for-app.df')
df.to_pickle(df_filename)

gdf_filename = os.path.join('..', 'processed-data', 'region.gdf')
gdf.to_pickle(gdf_filename)