# Dataset de la consommation d'énergie

## Import des librairies

In [53]:
import pandas as pd
import numpy as np

## Transformation des données

In [54]:
energy_consumption = pd.read_csv('../data/World Energy Consumption.csv')

energy_consumption.head(5)

Unnamed: 0,country,year,iso_code,population,gdp,biofuel_cons_change_pct,biofuel_cons_change_twh,biofuel_cons_per_capita,biofuel_consumption,biofuel_elec_per_capita,...,solar_share_elec,solar_share_energy,wind_cons_change_pct,wind_cons_change_twh,wind_consumption,wind_elec_per_capita,wind_electricity,wind_energy_per_capita,wind_share_elec,wind_share_energy
0,ASEAN (Ember),2000,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
1,ASEAN (Ember),2001,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
2,ASEAN (Ember),2002,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
3,ASEAN (Ember),2003,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
4,ASEAN (Ember),2004,,,,,,,,,...,0.0,,,,,,0.0,,0.0,


In [59]:
print(energy_consumption.columns.tolist())

['country', 'year', 'iso_code', 'population', 'gdp', 'biofuel_cons_change_pct', 'biofuel_cons_change_twh', 'biofuel_cons_per_capita', 'biofuel_consumption', 'biofuel_elec_per_capita', 'biofuel_electricity', 'biofuel_share_elec', 'biofuel_share_energy', 'carbon_intensity_elec', 'coal_cons_change_pct', 'coal_cons_change_twh', 'coal_cons_per_capita', 'coal_consumption', 'coal_elec_per_capita', 'coal_electricity', 'coal_prod_change_pct', 'coal_prod_change_twh', 'coal_prod_per_capita', 'coal_production', 'coal_share_elec', 'coal_share_energy', 'electricity_demand', 'electricity_generation', 'electricity_share_energy', 'energy_cons_change_pct', 'energy_cons_change_twh', 'energy_per_capita', 'energy_per_gdp', 'fossil_cons_change_pct', 'fossil_cons_change_twh', 'fossil_elec_per_capita', 'fossil_electricity', 'fossil_energy_per_capita', 'fossil_fuel_consumption', 'fossil_share_elec', 'fossil_share_energy', 'gas_cons_change_pct', 'gas_cons_change_twh', 'gas_consumption', 'gas_elec_per_capita', '

In [55]:
# Affichage des informations sur le DataFrame
energy_consumption.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22012 entries, 0 to 22011
Columns: 129 entries, country to wind_share_energy
dtypes: float64(126), int64(1), object(2)
memory usage: 21.7+ MB


In [56]:
# Suppression des lignes où la colonne 'population' est NaN
energy_consumption = energy_consumption.dropna(subset=['population'])


In [57]:
# On garde les lignes dont la colonne 'year' est comprise entre 2010 et 2019 inclus
energy_consumption = energy_consumption[(energy_consumption['year'] >= 2005) & (energy_consumption['year'] <= 2019)]


In [58]:
# On ne garde les données que concernant les 20 pays émettant le plus de CO2 (après recherche Internet) 
countries = ['China', 'United States', 'India', 'Russia', 'Japan', 'Germany', 'Iran', 'South Korea', 'Canada', 'Indonesia',
             'Mexico', 'Brazil', 'Saudi Arabia', 'South Africa', 'Turkey', 'Australia', 'United Kingdom', 'France', 'Italy', 'Spain']

energy_consumption = energy_consumption[energy_consumption['country'].isin(countries)]

energy_consumption.info()

<class 'pandas.core.frame.DataFrame'>
Index: 300 entries, 1617 to 20707
Columns: 129 entries, country to wind_share_energy
dtypes: float64(126), int64(1), object(2)
memory usage: 304.7+ KB


In [32]:
# Conservation des colonnes utiles
keep = [
    'country',
    'year',
    'coal_consumption',
    'coal_production',
    'coal_cons_change_pct',
    'coal_prod_change_pct',
    'electricity_demand',
    'electricity_generation',
    'biofuel_electricity',
    'coal_electricity',
    'energy_cons_change_pct',
    'fossil_cons_change_pct',
    'fossil_fuel_consumption',
    'fossil_electricity',
    'oil_consumption',
    'oil_cons_change_pct',
    'oil_electricity',
    'oil_prod_change_pct',
    'oil_production',
    'gas_cons_change_pct',
    'gas_prod_change_pct',
    'gas_consumption',
    'gas_production',
    'gas_electricity',
    'gas_prod_change_pct',
    'nuclear_consumption',
    'nuclear_electricity',
    'hydro_consumption',
    'renewables_consumption',
    'greenhouse_gas_emissions',
    'population'
]

renom = {
    'country':                           'Pays',
    'year':                              'Année',
    'coal_consumption':                  'Consommation_charbon',
    'coal_production':                   'Production_charbon',
    'coal_cons_change_pct':              'Variation_pct_consommation_charbon',
    'coal_prod_change_pct':              'Variation_pct_production_charbon',
    'electricity_demand':                'Demande_électricité',
    'electricity_generation':            'Production_électricité',
    'biofuel_electricity':               'Électricité_biocarburants',
    'coal_electricity':                  'Électricité_charbon',
    'energy_cons_change_pct':            'Variation_pct_consommation_énergie',
    'fossil_cons_change_pct':            'Variation_pct_consommation_fossile',
    'fossil_fuel_consumption':           'Consommation_combustibles_fossiles',
    'fossil_electricity':                'Électricité_fossile',
    'oil_consumption':                   'Consommation_pétrole',
    'oil_cons_change_pct':               'Variation_pct_consommation_pétrole',
    'oil_electricity':                   'Électricité_pétrole',
    'oil_prod_change_pct':               'Variation_pct_production_pétrole',
    'oil_production':                    'Production_pétrole',
    'gas_cons_change_pct':               'Variation_pct_consommation_gaz',
    'gas_prod_change_pct':               'Variation_pct_production_gaz',
    'gas_consumption':                   'Consommation_gaz',
    'gas_production':                    'Production_gaz',
    'gas_electricity':                   'Électricité_gaz',
    'nuclear_consumption':               'Consommation_nucléaire',
    'nuclear_electricity':               'Électricité_nucléaire',
    'hydro_consumption':                 'Consommation_hydroélectrique',
    'renewables_consumption':            'Consommation_renouvelables',
    'greenhouse_gas_emissions':          'Émissions_gaz_effet_de_serre',
    'population':                        'Population'
}

# Application dans votre notebook :
energy_consumption = energy_consumption[keep].rename(columns=renom)

energy_consumption.head(15)



Unnamed: 0,Pays,Année,Consommation_charbon,Production_charbon,Variation_pct_consommation_charbon,Variation_pct_production_charbon,Demande_électricité,Production_électricité,Électricité_biocarburants,Électricité_charbon,...,Consommation_gaz,Production_gaz,Électricité_gaz,Variation_pct_production_gaz,Consommation_nucléaire,Électricité_nucléaire,Consommation_hydroélectrique,Consommation_renouvelables,Émissions_gaz_effet_de_serre,Population
1617,Australia,2005,638.222,2501.289,-0.576,4.708,215.7,215.7,3.63,171.4,...,232.59,382.353,21.79,6.415,,0.0,44.285,60.836,154.36,20171732.0
1618,Australia,2006,655.499,2556.841,2.707,2.221,219.91,219.91,3.62,170.88,...,259.431,406.822,25.1,6.4,0.0,0.0,42.525,62.267,155.57,20467032.0
1619,Australia,2007,649.802,2640.855,-0.869,3.286,229.52,229.52,4.01,174.0,...,290.198,427.512,31.33,5.086,,0.0,36.897,59.896,161.61,20830832.0
1620,Australia,2008,677.291,2720.621,4.23,3.02,229.55,229.55,3.48,173.39,...,285.35,416.829,34.17,-2.499,,0.0,33.201,56.389,162.49,21247876.0
1621,Australia,2009,654.861,2822.834,-3.312,3.757,234.74,234.74,2.66,174.01,...,291.364,466.976,39.31,12.031,,0.0,35.306,59.178,165.06,21660898.0
1622,Australia,2010,607.394,2914.79,-7.248,3.258,233.63,233.63,2.3,165.57,...,317.064,526.255,44.01,12.694,,0.0,38.153,65.073,160.29,22019166.0
1623,Australia,2011,592.062,2850.813,-2.524,-2.195,240.89,240.89,2.46,164.21,...,330.82,544.515,46.58,3.47,,0.0,53.992,88.66,160.52,22357032.0
1624,Australia,2012,554.468,3091.909,-6.35,8.457,233.38,233.38,2.93,156.29,...,332.848,582.858,47.07,7.042,,0.0,46.712,87.377,154.79,22729272.0
1625,Australia,2013,526.242,3323.743,-5.091,7.498,229.98,229.98,3.06,142.94,...,350.005,605.766,48.48,3.93,,0.0,52.025,101.302,145.42,23111788.0
1626,Australia,2014,521.888,3559.449,-0.827,7.092,241.61,241.61,3.39,148.48,...,375.677,652.744,51.32,7.755,,0.0,39.239,93.464,152.38,23469578.0


In [41]:
# Vérification des types de données
energy_consumption.info()

<class 'pandas.core.frame.DataFrame'>
Index: 300 entries, 2005 to 2019
Data columns (total 30 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   Pays                                300 non-null    object 
 1   Consommation_charbon                300 non-null    float64
 2   Production_charbon                  288 non-null    float64
 3   Variation_pct_consommation_charbon  300 non-null    float64
 4   Variation_pct_production_charbon    268 non-null    float64
 5   Demande_électricité                 300 non-null    float64
 6   Production_électricité              300 non-null    float64
 7   Électricité_biocarburants           300 non-null    float64
 8   Électricité_charbon                 300 non-null    float64
 9   Variation_pct_consommation_énergie  300 non-null    float64
 10  Variation_pct_consommation_fossile  300 non-null    float64
 11  Consommation_combustibles_fossiles  300 non-nu

In [42]:
# Verication des valeurs manquantes
energy_consumption.isnull().sum()

Pays                                   0
Consommation_charbon                   0
Production_charbon                    12
Variation_pct_consommation_charbon     0
Variation_pct_production_charbon      32
Demande_électricité                    0
Production_électricité                 0
Électricité_biocarburants              0
Électricité_charbon                    0
Variation_pct_consommation_énergie     0
Variation_pct_consommation_fossile     0
Consommation_combustibles_fossiles     0
Électricité_fossile                    0
Consommation_pétrole                   0
Variation_pct_consommation_pétrole     0
Électricité_pétrole                    0
Variation_pct_production_pétrole      18
Production_pétrole                    21
Variation_pct_consommation_gaz         0
Variation_pct_production_gaz           4
Consommation_gaz                       0
Production_gaz                        18
Électricité_gaz                        0
Variation_pct_production_gaz           4
Consommation_nuc

In [44]:
# Verification des valeurs NaN
energy_consumption.isna().sum() 

Pays                                   0
Consommation_charbon                   0
Production_charbon                    12
Variation_pct_consommation_charbon     0
Variation_pct_production_charbon      32
Demande_électricité                    0
Production_électricité                 0
Électricité_biocarburants              0
Électricité_charbon                    0
Variation_pct_consommation_énergie     0
Variation_pct_consommation_fossile     0
Consommation_combustibles_fossiles     0
Électricité_fossile                    0
Consommation_pétrole                   0
Variation_pct_consommation_pétrole     0
Électricité_pétrole                    0
Variation_pct_production_pétrole      18
Production_pétrole                    21
Variation_pct_consommation_gaz         0
Variation_pct_production_gaz           4
Consommation_gaz                       0
Production_gaz                        18
Électricité_gaz                        0
Variation_pct_production_gaz           4
Consommation_nuc

In [45]:
# Export CSV
energy_consumption.to_csv('../transformed_data/energy_consumption.csv', sep=';', index=True, encoding='utf-8-sig')