In [116]:
import pandas as pd
from matplotlib import pyplot as plt

data = pd.DataFrame(columns = ['country', 'year', 'pollution_co2_t_per_capita'])
df = pd.read_csv('./assets/CO2_Emissions_1960-2018.csv')

# Pays gardés
kept_countries = ['France', 'Italy', 'Spain', 'Germany', 'Portugal', 'United Kingdom']
limit_to_kept_countries = False

df.head(5)

Unnamed: 0,Country Name,1960,1961,1962,1963,1964,1965,1966,1967,1968,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Aruba,204.631696,208.837879,226.08189,214.785217,207.626699,185.213644,172.158729,210.819017,194.917536,...,,,,,,,,,,
1,Africa Eastern and Southern,0.90606,0.922474,0.930816,0.94057,0.996033,1.04728,1.033908,1.052204,1.079727,...,1.021954,1.048876,1.005338,1.021646,1.031833,1.041145,0.987393,0.971016,0.959978,0.933541
2,Afghanistan,0.046057,0.053589,0.073721,0.074161,0.086174,0.101285,0.107399,0.123409,0.115142,...,0.211306,0.297065,0.407074,0.335351,0.263716,0.234037,0.232176,0.208857,0.203328,0.200151
3,Africa Western and Central,0.09088,0.095283,0.096612,0.112376,0.133258,0.184803,0.193676,0.189305,0.143989,...,0.42677,0.472819,0.497023,0.490867,0.504655,0.507671,0.480743,0.472959,0.476438,0.515544
4,Angola,0.100835,0.082204,0.210533,0.202739,0.213562,0.205891,0.268937,0.172096,0.289702,...,1.205902,1.221515,1.216317,1.204799,1.261542,1.285365,1.260921,1.227703,1.034317,0.88738


In [117]:
# Récupération de la liste des pays
countries = df['Country Name'].unique().tolist()
print(countries)
df.set_index('Country Name', inplace=True)

['Aruba', 'Africa Eastern and Southern', 'Afghanistan', 'Africa Western and Central', 'Angola', 'Albania', 'Andorra', 'Arab World', 'United Arab Emirates', 'Argentina', 'Armenia', 'American Samoa', 'Antigua and Barbuda', 'Australia', 'Austria', 'Azerbaijan', 'Burundi', 'Belgium', 'Benin', 'Burkina Faso', 'Bangladesh', 'Bulgaria', 'Bahrain', 'Bahamas, The', 'Bosnia and Herzegovina', 'Belarus', 'Belize', 'Bermuda', 'Bolivia', 'Brazil', 'Barbados', 'Brunei Darussalam', 'Bhutan', 'Botswana', 'Central African Republic', 'Canada', 'Central Europe and the Baltics', 'Switzerland', 'Channel Islands', 'Chile', 'China', "Cote d'Ivoire", 'Cameroon', 'Congo, Dem. Rep.', 'Congo, Rep.', 'Colombia', 'Comoros', 'Cabo Verde', 'Costa Rica', 'Caribbean small states', 'Cuba', 'Curacao', 'Cayman Islands', 'Cyprus', 'Czech Republic', 'Germany', 'Djibouti', 'Dominica', 'Denmark', 'Dominican Republic', 'Algeria', 'East Asia & Pacific (excluding high income)', 'Early-demographic dividend', 'East Asia & Pacific'

In [118]:
# Récupération de la liste des années
columns = df.columns
years = columns[1:].tolist()
print(years)

['1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']


In [119]:
# Création nouveau dataframe organisé par pays puis années
data_list = []

for country in countries:
    #values = df[df['Country Name'] == country]
    for year in years:
        value = df.loc[country, year]
        #print(value)
        data_list.append({'country': country, 'year': year, 'pollution_co2_t_per_capita': value})
data = pd.DataFrame(data_list, columns=['country', 'year', 'pollution_co2_t_per_capita'])
data.head(5)

Unnamed: 0,country,year,pollution_co2_t_per_capita
0,Aruba,1961,208.837879
1,Aruba,1962,226.08189
2,Aruba,1963,214.785217
3,Aruba,1964,207.626699
4,Aruba,1965,185.213644


In [120]:
data.dtypes

country                        object
year                           object
pollution_co2_t_per_capita    float64
dtype: object

In [121]:
data.shape

(15428, 3)

In [122]:
# Suppression des valeurs inutiles si nécessaires
nb_na = data['pollution_co2_t_per_capita'].isna().sum()
print("Nombre de valeurs invalides ou nulls: " + str(nb_na))
if nb_na > 0:
    data.dropna(subset=['pollution_co2_t_per_capita'], inplace=True)
data.shape

Nombre de valeurs invalides ou nulls: 2289


(13139, 3)

In [123]:
data.to_csv('./output/CO2_Emissions_sortedByYearAndCountry.csv', index=False)

In [124]:
# Retirer les valeurs des pays différents de cette liste et exporter le fichier
if limit_to_kept_countries:
    data = data[data.country.isin(kept_countries)]
data.to_csv('./output/CO2_Emissions_sortedByYearAndCountry.csv', index=False)


In [125]:
# Charger le fichier World Energy Consumption
wec = pd.read_csv('./assets/World Energy Consumption.csv')
if limit_to_kept_countries:
    wec = wec[wec.country.isin(kept_countries)]
wec.to_csv('./output/World Energy Consumption (limited).csv', index=False)

wec.head(5)

Unnamed: 0,country,year,iso_code,population,gdp,biofuel_cons_change_pct,biofuel_cons_change_twh,biofuel_cons_per_capita,biofuel_consumption,biofuel_elec_per_capita,...,solar_share_elec,solar_share_energy,wind_cons_change_pct,wind_cons_change_twh,wind_consumption,wind_elec_per_capita,wind_electricity,wind_energy_per_capita,wind_share_elec,wind_share_energy
0,ASEAN (Ember),2000,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
1,ASEAN (Ember),2001,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
2,ASEAN (Ember),2002,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
3,ASEAN (Ember),2003,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
4,ASEAN (Ember),2004,,,,,,,,,...,0.0,,,,,,0.0,,0.0,


In [126]:
# Renommer toutes les colonnes avec les unités
wec.columns = [
    'country', 'year', 'iso_code', 'population', 'gdp_usd', 'biofuel_cons_change_pct', 'biofuel_cons_change_twh',
    'biofuel_cons_per_capita_kwh', 'biofuel_consumption_twh', 'biofuel_elec_per_capita_kwh', 'biofuel_electricity_twh',
    'biofuel_share_elec_pct_total_elec', 'biofuel_share_energy_pct_total_primary', 'carbon_intensity_elec_gCO2_eq.kwh',
    'coal_cons_change_pct', 'coal_cons_change_twh', 'coal_cons_per_capita_kwh', 'coal_consumption_twh',
    'coal_elec_per_capita_kwh', 'coal_electricity_twh', 'coal_prod_change_pct', 'coal_prod_change_twh',
    'coal_prod_per_capita_kwh', 'coal_production_twh', 'coal_share_elec_pct_total_elec',
    'coal_share_energy_pct_total_primary', 'electricity_demand_twh', 'electricity_generation_twh',
    'electricity_share_energy_pct_total_primary', 'energy_cons_change_pct', 'energy_cons_change_twh',
    'energy_per_capita_kwh', 'energy_per_gdp_kwh_per_usd', 'fossil_cons_change_pct', 'fossil_cons_change_twh',
    'fossil_elec_per_capita_kwh', 'fossil_electricity_twh', 'fossil_energy_per_capita_kwh', 'fossil_fuel_consumption_twh',
    'fossil_share_elec_pct_total_elec', 'fossil_share_energy_pct_total_primary', 'gas_cons_change_pct',
    'gas_cons_change_twh', 'gas_consumption_twh', 'gas_elec_per_capita_kwh', 'gas_electricity_twh',
    'gas_energy_per_capita_kwh', 'gas_prod_change_pct', 'gas_prod_change_twh', 'gas_prod_per_capita_kwh',
    'gas_production_twh', 'gas_share_elec_pct_total_elec', 'gas_share_energy_pct_total_primary',
    'greenhouse_gas_emissions_Mt_co2_eq', 'hydro_cons_change_pct', 'hydro_cons_change_twh', 'hydro_consumption_twh',
    'hydro_elec_per_capita_kwh', 'hydro_electricity_twh', 'hydro_energy_per_capita_kwh',
    'hydro_share_elec_pct_total_elec', 'hydro_share_energy_pct_total_primary', 'low_carbon_cons_change_pct',
    'low_carbon_cons_change_twh', 'low_carbon_consumption_twh', 'low_carbon_elec_per_capita_kwh',
    'low_carbon_electricity_twh', 'low_carbon_energy_per_capita_kwh', 'low_carbon_share_elec_pct_total_elec',
    'low_carbon_share_energy_pct_total_primary', 'net_elec_imports_twh', 'net_elec_imports_share_demand_pct_total_elec',
    'nuclear_cons_change_pct', 'nuclear_cons_change_twh', 'nuclear_consumption_twh', 'nuclear_elec_per_capita_kwh',
    'nuclear_electricity_twh', 'nuclear_energy_per_capita_kwh', 'nuclear_share_elec_pct_total_elec',
    'nuclear_share_energy_pct_total_primary', 'oil_cons_change_pct', 'oil_cons_change_twh', 'oil_consumption_twh',
    'oil_elec_per_capita_kwh', 'oil_electricity_twh', 'oil_energy_per_capita_kwh', 'oil_prod_change_pct',
    'oil_prod_change_twh', 'oil_prod_per_capita_kwh', 'oil_production_twh', 'oil_share_elec_pct_total_elec',
    'oil_share_energy_pct_total_primary', 'other_renewable_consumption_twh', 'other_renewable_electricity_twh',
    'other_renewable_exc_biofuel_electricity_twh', 'other_renewables_cons_change_pct',
    'other_renewables_cons_change_twh', 'other_renewables_elec_per_capita_kwh',
    'other_renewables_elec_per_capita_exc_biofuel_kwh', 'other_renewables_energy_per_capita_kwh',
    'other_renewables_share_elec_pct_total_elec', 'other_renewables_share_elec_exc_biofuel_pct_total_elec',
    'other_renewables_share_energy_pct_total_primary', 'per_capita_electricity_kwh', 'primary_energy_consumption_twh',
    'renewables_cons_change_pct', 'renewables_cons_change_twh', 'renewables_consumption_twh',
    'renewables_elec_per_capita_kwh', 'renewables_electricity_twh', 'renewables_energy_per_capita_kwh',
    'renewables_share_elec_pct_total_elec', 'renewables_share_energy_pct_total_primary', 'solar_cons_change_pct',
    'solar_cons_change_twh', 'solar_consumption_twh', 'solar_elec_per_capita_kwh', 'solar_electricity_twh',
    'solar_energy_per_capita_kwh', 'solar_share_elec_pct_total_elec', 'solar_share_energy_pct_total_primary',
    'wind_cons_change_pct', 'wind_cons_change_twh', 'wind_consumption_twh', 'wind_elec_per_capita_kwh',
    'wind_electricity_twh', 'wind_energy_per_capita_kwh', 'wind_share_elec_pct_total_elec',
    'wind_share_energy_pct_total_primary'
]

# 5 premières lignes après avoir renommé les colonnes avec les unités
wec.head(5)

Unnamed: 0,country,year,iso_code,population,gdp_usd,biofuel_cons_change_pct,biofuel_cons_change_twh,biofuel_cons_per_capita_kwh,biofuel_consumption_twh,biofuel_elec_per_capita_kwh,...,solar_share_elec_pct_total_elec,solar_share_energy_pct_total_primary,wind_cons_change_pct,wind_cons_change_twh,wind_consumption_twh,wind_elec_per_capita_kwh,wind_electricity_twh,wind_energy_per_capita_kwh,wind_share_elec_pct_total_elec,wind_share_energy_pct_total_primary
0,ASEAN (Ember),2000,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
1,ASEAN (Ember),2001,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
2,ASEAN (Ember),2002,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
3,ASEAN (Ember),2003,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
4,ASEAN (Ember),2004,,,,,,,,,...,0.0,,,,,,0.0,,0.0,


In [127]:
# Changement de types
data['year'] = data['year'].astype(int)
wec['year'] = wec['year'].astype(int)

wec['country'] = wec['country'].astype(str)
data['country'] = data['country'].astype(str)

# Fusion des valeurs de Co2
wec = pd.merge(wec, data, on=["country", "year"], how='left')
            
# Supprimer la colonne 'population'
# wec = wec.drop('population', axis=1, errors='ignore')
# wec = wec.drop('iso_code', axis=1, errors='ignore')
# wec = wec.drop('gdp', axis=1, errors='ignore')

wec = wec[wec['year'] >= 1960]

# Valeurs enregistrés
wec.to_csv('./output/World Energy Consumption (limited).csv', index=False)

In [128]:
#Dataset Taxes sur les énergies

In [129]:
# Charger le dataset des taxes sur les énergies
tax = pd.read_csv('./assets/Tax_Energy.csv')
tax.head(5)

Unnamed: 0,ObjectId,Country,ISO2,ISO3,Indicator,Source,CTS Code,CTS Name,CTS Full Descriptor,Unit,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,1,Albania,AL,ALB,Environmental Taxes,Organisation for Economic Co-operation and Dev...,ECGTE,Environmental Taxes,"Environment, Climate Change, Government Policy...",Domestic Currency,...,,,,43993140000.0,47813790000.0,47548580000.0,51145590000.0,53415650000.0,,
1,2,Albania,AL,ALB,Environmental Taxes,Organisation for Economic Co-operation and Dev...,ECGTE,Environmental Taxes,"Environment, Climate Change, Government Policy...",Percent of GDP,...,,,,3.067206,3.247163,3.066373,3.124865,3.157133,,
2,3,Albania,AL,ALB,Taxes on Energy (including fuel for transport),Organisation for Economic Co-operation and Dev...,ECGTEN,Taxes on Energy (Including Fuel for Transport),"Environment, Climate Change, Government Policy...",Domestic Currency,...,,,,37741110000.0,40945620000.0,40400040000.0,43521820000.0,45165300000.0,,
3,4,Albania,AL,ALB,Taxes on Energy (including fuel for transport),Organisation for Economic Co-operation and Dev...,ECGTEN,Taxes on Energy (Including Fuel for Transport),"Environment, Climate Change, Government Policy...",Percent of GDP,...,,,,2.631314,2.780726,2.605369,2.659072,2.669496,,
4,5,Albania,AL,ALB,Taxes on Pollution,Organisation for Economic Co-operation and Dev...,ECGTEP,Taxes on Pollution,"Environment, Climate Change, Government Policy...",Domestic Currency,...,,,,1782069000.0,1879970000.0,1941324000.0,2226251000.0,2625011000.0,,


In [130]:
tax_country = tax[tax["Country"] == "Andorra, Principality of"]
print(tax_country)

    ObjectId                   Country ISO2 ISO3  \
10        11  Andorra, Principality of   AD  AND   
11        12  Andorra, Principality of   AD  AND   
12        13  Andorra, Principality of   AD  AND   
13        14  Andorra, Principality of   AD  AND   
14        15  Andorra, Principality of   AD  AND   

                                            Indicator  \
10                                Environmental Taxes   
11     Taxes on Energy (including fuel for transport)   
12                                 Taxes on Pollution   
13                                 Taxes on Resources   
14  Taxes on Transport (excluding fuel for transport)   

                                               Source CTS Code  \
10  Organisation for Economic Co-operation and Dev...    ECGTE   
11  Organisation for Economic Co-operation and Dev...   ECGTEN   
12  Organisation for Economic Co-operation and Dev...   ECGTEP   
13  Organisation for Economic Co-operation and Dev...   ECGTER   
14  Organisati

In [131]:
# Compter le nombre de valeurs "NaN" dans les colonnes
nb_nan_colTax = tax.isna().sum()

print("Nombre de NaN dans les colonnes :", nb_nan_colTax)

# Compter le nombre de valeurs "NaN" dans le dataset
nb_nan_dataset = tax.isna().sum().sum()

print("Nombre de NaN dans le dataset :", nb_nan_dataset)

Nombre de NaN dans les colonnes : ObjectId                 0
Country                  0
ISO2                    10
ISO3                     8
Indicator                0
Source                   0
CTS Code                 0
CTS Name                 0
CTS Full Descriptor      0
Unit                     0
1995                   495
1996                   491
1997                   491
1998                   483
1999                   481
2000                   431
2001                   415
2002                   411
2003                   403
2004                   403
2005                   369
2006                   359
2007                   351
2008                   321
2009                   321
2010                   295
2011                   303
2012                   307
2013                   309
2014                   305
2015                    44
2016                    46
2017                    50
2018                    68
2019                   100
2020                 

In [132]:
# Regrouper les années dans une seule colonne
tax = pd.melt(tax, id_vars=["ObjectId", "Country", "ISO2", "ISO3", "Indicator", "Source", "CTS Code", "CTS Name", "CTS Full Descriptor", "Unit"], 
                    var_name="Année", value_name="Taxe")
tax.head(5)

Unnamed: 0,ObjectId,Country,ISO2,ISO3,Indicator,Source,CTS Code,CTS Name,CTS Full Descriptor,Unit,Année,Taxe
0,1,Albania,AL,ALB,Environmental Taxes,Organisation for Economic Co-operation and Dev...,ECGTE,Environmental Taxes,"Environment, Climate Change, Government Policy...",Domestic Currency,1995,
1,2,Albania,AL,ALB,Environmental Taxes,Organisation for Economic Co-operation and Dev...,ECGTE,Environmental Taxes,"Environment, Climate Change, Government Policy...",Percent of GDP,1995,
2,3,Albania,AL,ALB,Taxes on Energy (including fuel for transport),Organisation for Economic Co-operation and Dev...,ECGTEN,Taxes on Energy (Including Fuel for Transport),"Environment, Climate Change, Government Policy...",Domestic Currency,1995,
3,4,Albania,AL,ALB,Taxes on Energy (including fuel for transport),Organisation for Economic Co-operation and Dev...,ECGTEN,Taxes on Energy (Including Fuel for Transport),"Environment, Climate Change, Government Policy...",Percent of GDP,1995,
4,5,Albania,AL,ALB,Taxes on Pollution,Organisation for Economic Co-operation and Dev...,ECGTEP,Taxes on Pollution,"Environment, Climate Change, Government Policy...",Domestic Currency,1995,


In [133]:
# Concaténer les valeurs de "CTS Code" et "CTS Name" avec un séparateur "-"
tax["CTS Name"] = tax["CTS Code"] + " - " + tax["CTS Name"]

# Supprimer la colonne "CTS Code" puisque ses valeurs ont été concaténées avec "CTS Name"
# Supprimer les colonnes inutiles "ObjectId", "ISO2", "Indicator", "Source", "CTS Full Descriptor"
tax.drop(columns=["CTS Code", "ObjectId", "ISO2", "ISO3", "Indicator", "Source", "CTS Full Descriptor"], inplace=True)

tax.head(5)

Unnamed: 0,Country,CTS Name,Unit,Année,Taxe
0,Albania,ECGTE - Environmental Taxes,Domestic Currency,1995,
1,Albania,ECGTE - Environmental Taxes,Percent of GDP,1995,
2,Albania,ECGTEN - Taxes on Energy (Including Fuel for T...,Domestic Currency,1995,
3,Albania,ECGTEN - Taxes on Energy (Including Fuel for T...,Percent of GDP,1995,
4,Albania,ECGTEP - Taxes on Pollution,Domestic Currency,1995,


In [134]:
tax.columns

Index(['Country', 'CTS Name', 'Unit', 'Année', 'Taxe'], dtype='object')

In [135]:
tax.columns.dtype

dtype('O')

In [136]:
tax.dtypes

Country      object
CTS Name     object
Unit         object
Année        object
Taxe        float64
dtype: object

In [137]:
tax.shape

(33453, 5)

In [138]:
# Vérifier les valeurs nulles dans le DataFrame
null_values = tax.isnull()

# Vérifier si le DataFrame est vide
is_empty = tax.empty

# Afficher les valeurs nulles
print("Valeurs nulles dans le DataFrame :")
print(null_values)

# Afficher si le DataFrame est vide
print("\nLe DataFrame est vide :", is_empty)

Valeurs nulles dans le DataFrame :
       Country  CTS Name   Unit  Année  Taxe
0        False     False  False  False  True
1        False     False  False  False  True
2        False     False  False  False  True
3        False     False  False  False  True
4        False     False  False  False  True
...        ...       ...    ...    ...   ...
33448    False     False  False  False  True
33449    False     False  False  False  True
33450    False     False  False  False  True
33451    False     False  False  False  True
33452    False     False  False  False  True

[33453 rows x 5 columns]

Le DataFrame est vide : False


In [139]:
# Vérifier s'il y a des valeurs "nan" ou "null" dans la colonne 'Taxe'
nan_colTaxe = tax['Taxe'].isna().any()

print("Valeurs nan ou null dans la colonne 'Taxe':", nan_colTaxe)

Valeurs nan ou null dans la colonne 'Taxe': True


In [140]:
# Compter le nombre de valeurs "NaN" dans la colonne 'Taxe'
nb_nan_colTax = tax['Taxe'].isna().sum()

print("Nombre de NaN dans la colonne 'Taxe':", nb_nan_colTax)

Nombre de NaN dans la colonne 'Taxe': 8823


In [141]:
# Filtrer les lignes avec des valeurs  puis les afficher
nan_rows = tax[tax.isna().any(axis=1)]

print("Lignes contenant des valeurs NaN :")
print(nan_rows)

Lignes contenant des valeurs NaN :
                  Country                                           CTS Name  \
0                 Albania                        ECGTE - Environmental Taxes   
1                 Albania                        ECGTE - Environmental Taxes   
2                 Albania  ECGTEN - Taxes on Energy (Including Fuel for T...   
3                 Albania  ECGTEN - Taxes on Energy (Including Fuel for T...   
4                 Albania                        ECGTEP - Taxes on Pollution   
...                   ...                                                ...   
33448  West Bank and Gaza                        ECGTEP - Taxes on Pollution   
33449  West Bank and Gaza                        ECGTER - Taxes on Resources   
33450  West Bank and Gaza                        ECGTER - Taxes on Resources   
33451  West Bank and Gaza  ECGTET - Taxes on Transport (Excluding Fuel fo...   
33452  West Bank and Gaza  ECGTET - Taxes on Transport (Excluding Fuel fo...   

    

In [142]:
# Compter le nombre de valeurs "NaN" dans les colonnes
nb_nan_colTax = tax.isna().sum()

print("Nombre de NaN dans les colonnes :", nb_nan_colTax)

# Compter le nombre de valeurs "NaN" dans le dataset
nb_nan_dataset = tax.isna().sum().sum()

print("Nombre de NaN dans le dataset :", nb_nan_dataset)

Nombre de NaN dans les colonnes : Country        0
CTS Name       0
Unit           0
Année          0
Taxe        8823
dtype: int64
Nombre de NaN dans le dataset : 8823


In [143]:
print(tax.columns)

Index(['Country', 'CTS Name', 'Unit', 'Année', 'Taxe'], dtype='object')


In [144]:
# Pivoter les données pour mettre les catégories des taxes de la colonne "CTS Name" en colonnes et pour mettre les unités des taxes de la colonne "Unit" en colonnes
taxE = tax.pivot_table(index=['Country', 'Année'],
                               columns=['CTS Name', 'Unit'],
                               values='Taxe').reset_index()

taxE.head(5)

CTS Name,Country,Année,ECGTE - Environmental Taxes,ECGTE - Environmental Taxes,ECGTEN - Taxes on Energy (Including Fuel for Transport),ECGTEN - Taxes on Energy (Including Fuel for Transport),ECGTEP - Taxes on Pollution,ECGTEP - Taxes on Pollution,ECGTER - Taxes on Resources,ECGTER - Taxes on Resources,ECGTET - Taxes on Transport (Excluding Fuel for Transport),ECGTET - Taxes on Transport (Excluding Fuel for Transport)
Unit,Unnamed: 1_level_1,Unnamed: 2_level_1,Domestic Currency,Percent of GDP,Domestic Currency,Percent of GDP,Domestic Currency,Percent of GDP,Domestic Currency,Percent of GDP,Domestic Currency,Percent of GDP
0,Albania,2015,43993140000.0,3.067206,37741110000.0,2.631314,1782069000.0,0.124246,32546493.0,0.002269,4437413000.0,0.309377
1,Albania,2016,47813790000.0,3.247163,40945620000.0,2.780726,1879970000.0,0.127674,52524339.0,0.003567,4935684000.0,0.335195
2,Albania,2017,47548580000.0,3.066373,40400040000.0,2.605369,1941324000.0,0.125195,61861356.38,0.003989,5145347000.0,0.33182
3,Albania,2018,51145590000.0,3.124865,43521820000.0,2.659072,2226251000.0,0.136018,56011991.0,0.003422,5341506000.0,0.326352
4,Albania,2019,53415650000.0,3.157133,45165300000.0,2.669496,2625011000.0,0.155151,58777099.5,0.003474,5566564000.0,0.329012


In [145]:
# Renommer les colonnes
taxE.columns = [
    'country',    
    'year',
    'ECGTE_EnvTaxes_DomCCY',
    'ECGTE_EnvTaxes_PctGDP',
    'ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY',
    'ECGTEN_TaxesEnergy_InclFuelTrans_PctGDP',
    'ECGTEP_TaxesPollution_DomCCY',
    'ECGTEP_TaxesPollution_PctGDP',
    'ECGTER_TaxesResources_DomCCY',
    'ECGTER_TaxesResources_PctGDP',
    'ECGTET_TaxesTrans_ExclFuelTrans_DomCCY',
    'ECGTET_TaxesTrans_ExclFuelTrans_PctGDP'
]
# Afficher les nouvelles colonnes
print(taxE.columns)

Index(['country', 'year', 'ECGTE_EnvTaxes_DomCCY', 'ECGTE_EnvTaxes_PctGDP',
       'ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY',
       'ECGTEN_TaxesEnergy_InclFuelTrans_PctGDP',
       'ECGTEP_TaxesPollution_DomCCY', 'ECGTEP_TaxesPollution_PctGDP',
       'ECGTER_TaxesResources_DomCCY', 'ECGTER_TaxesResources_PctGDP',
       'ECGTET_TaxesTrans_ExclFuelTrans_DomCCY',
       'ECGTET_TaxesTrans_ExclFuelTrans_PctGDP'],
      dtype='object')


In [146]:
# Compter le nombre de valeurs "NaN" dans les colonnes
nb_nan_colTax_taxE = taxE.isna().sum()

print("Nombre de NaN dans les colonnes :", nb_nan_colTax_taxE)

# Compter le nombre de valeurs "NaN" dans le dataset
nb_nan_dataset_taxE = taxE.isna().sum().sum()

print("Nombre de NaN dans le dataset :", nb_nan_dataset_taxE)

Nombre de NaN dans les colonnes : country                                      0
year                                         0
ECGTE_EnvTaxes_DomCCY                      460
ECGTE_EnvTaxes_PctGDP                      477
ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY    344
ECGTEN_TaxesEnergy_InclFuelTrans_PctGDP    350
ECGTEP_TaxesPollution_DomCCY               134
ECGTEP_TaxesPollution_PctGDP               140
ECGTER_TaxesResources_DomCCY               146
ECGTER_TaxesResources_PctGDP               152
ECGTET_TaxesTrans_ExclFuelTrans_DomCCY     350
ECGTET_TaxesTrans_ExclFuelTrans_PctGDP     367
dtype: int64
Nombre de NaN dans le dataset : 2920


In [147]:
# Filtrer les lignes avec des valeurs nan puis les afficher
nan_rows_taxE = taxE[taxE.isna().any(axis=1)]

print("Lignes contenant des valeurs NaN :")
print(nan_rows_taxE)

Lignes contenant des valeurs NaN :
                       country  year  ECGTE_EnvTaxes_DomCCY  \
5     Andorra, Principality of  2015           4.600367e+12   
6     Andorra, Principality of  2016           4.616889e+12   
7     Andorra, Principality of  2017           5.043869e+12   
8     Andorra, Principality of  2018           5.283535e+12   
9     Andorra, Principality of  2019           5.675649e+12   
...                        ...   ...                    ...   
2726                   Vietnam  1999                    NaN   
2727                   Vietnam  2000                    NaN   
2728                   Vietnam  2001                    NaN   
2729                   Vietnam  2002                    NaN   
2748                   Vietnam  2021                    NaN   

      ECGTE_EnvTaxes_PctGDP  ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY  \
5                       NaN                                      0.0   
6                       NaN                                     

In [148]:
taxE.dtypes

country                                     object
year                                        object
ECGTE_EnvTaxes_DomCCY                      float64
ECGTE_EnvTaxes_PctGDP                      float64
ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY    float64
ECGTEN_TaxesEnergy_InclFuelTrans_PctGDP    float64
ECGTEP_TaxesPollution_DomCCY               float64
ECGTEP_TaxesPollution_PctGDP               float64
ECGTER_TaxesResources_DomCCY               float64
ECGTER_TaxesResources_PctGDP               float64
ECGTET_TaxesTrans_ExclFuelTrans_DomCCY     float64
ECGTET_TaxesTrans_ExclFuelTrans_PctGDP     float64
dtype: object

In [149]:
taxE.shape

(2755, 12)

In [150]:
# Vérifier les valeurs nulles dans le DataFrame
null_values = taxE.isnull()

# Vérifier si le DataFrame est vide
is_empty = taxE.empty

# Afficher les valeurs nulles
print("Valeurs nulles dans le DataFrame :")
print(null_values)

# Afficher si le DataFrame est vide
print("\nLe DataFrame est vide :", is_empty)


Valeurs nulles dans le DataFrame :
      country   year  ECGTE_EnvTaxes_DomCCY  ECGTE_EnvTaxes_PctGDP  \
0       False  False                  False                  False   
1       False  False                  False                  False   
2       False  False                  False                  False   
3       False  False                  False                  False   
4       False  False                  False                  False   
...       ...    ...                    ...                    ...   
2750    False  False                  False                  False   
2751    False  False                  False                  False   
2752    False  False                  False                  False   
2753    False  False                  False                  False   
2754    False  False                  False                  False   

      ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY  \
0                                       False   
1                         

In [151]:
# Sélectionner les lignes avec NaN ou 0 dans les 10 colonnes spécifiées
lignes_nan_zero = taxE.loc[(taxE['ECGTE_EnvTaxes_DomCCY'].isna() | (taxE['ECGTE_EnvTaxes_DomCCY'] == 0)) & (taxE['ECGTE_EnvTaxes_PctGDP'].isna() | (taxE['ECGTE_EnvTaxes_PctGDP'] == 0)) &                                (taxE['ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY'].isna() | (taxE['ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY'] == 0)) & (taxE['ECGTEN_TaxesEnergy_InclFuelTrans_PctGDP'].isna() | (taxE['ECGTEN_TaxesEnergy_InclFuelTrans_PctGDP'] == 0)) & (taxE['ECGTEP_TaxesPollution_DomCCY'].isna() | (taxE['ECGTEP_TaxesPollution_DomCCY'] == 0)) & (taxE['ECGTEP_TaxesPollution_PctGDP'].isna() | (taxE['ECGTEP_TaxesPollution_PctGDP'] == 0)) & (taxE['ECGTER_TaxesResources_DomCCY'].isna() | (taxE['ECGTER_TaxesResources_DomCCY'] == 0)) & (taxE['ECGTER_TaxesResources_PctGDP'].isna() | (taxE['ECGTER_TaxesResources_PctGDP'] == 0)) & (taxE['ECGTET_TaxesTrans_ExclFuelTrans_DomCCY'].isna() | (taxE['ECGTET_TaxesTrans_ExclFuelTrans_DomCCY'] == 0)) & (taxE['ECGTET_TaxesTrans_ExclFuelTrans_PctGDP'].isna() | (taxE['ECGTET_TaxesTrans_ExclFuelTrans_PctGDP'] == 0))]

# Afficher les lignes sélectionnées
print(lignes_nan_zero)

         country  year  ECGTE_EnvTaxes_DomCCY  ECGTE_EnvTaxes_PctGDP  \
152   Bangladesh  1995                    NaN                    NaN   
153   Bangladesh  1996                    NaN                    NaN   
154   Bangladesh  1997                    NaN                    NaN   
155   Bangladesh  1998                    NaN                    NaN   
156   Bangladesh  1999                    NaN                    NaN   
...          ...   ...                    ...                    ...   
2726     Vietnam  1999                    NaN                    NaN   
2727     Vietnam  2000                    NaN                    NaN   
2728     Vietnam  2001                    NaN                    NaN   
2729     Vietnam  2002                    NaN                    NaN   
2748     Vietnam  2021                    NaN                    NaN   

      ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY  \
152                                       0.0   
153                                  

In [152]:
# Supprimer les lignes avec NaN ou 0 dans les 10 colonnes spécifiées
taxE = taxE.drop(taxE[(taxE['ECGTE_EnvTaxes_DomCCY'].isna() | (taxE['ECGTE_EnvTaxes_DomCCY'] == 0)) & (taxE['ECGTE_EnvTaxes_PctGDP'].isna() | (taxE['ECGTE_EnvTaxes_PctGDP'] == 0)) & (taxE['ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY'].isna() | (taxE['ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY'] == 0)) & (taxE['ECGTEN_TaxesEnergy_InclFuelTrans_PctGDP'].isna() | (taxE['ECGTEN_TaxesEnergy_InclFuelTrans_PctGDP'] == 0)) & (taxE['ECGTEP_TaxesPollution_DomCCY'].isna() | (taxE['ECGTEP_TaxesPollution_DomCCY'] == 0)) & (taxE['ECGTEP_TaxesPollution_PctGDP'].isna() | (taxE['ECGTEP_TaxesPollution_PctGDP'] == 0)) & (taxE['ECGTER_TaxesResources_DomCCY'].isna() | (taxE['ECGTER_TaxesResources_DomCCY'] == 0)) & (taxE['ECGTER_TaxesResources_PctGDP'].isna() | (taxE['ECGTER_TaxesResources_PctGDP'] == 0)) & (taxE['ECGTET_TaxesTrans_ExclFuelTrans_DomCCY'].isna() | (taxE['ECGTET_TaxesTrans_ExclFuelTrans_DomCCY'] == 0)) & (taxE['ECGTET_TaxesTrans_ExclFuelTrans_PctGDP'].isna() | (taxE['ECGTET_TaxesTrans_ExclFuelTrans_PctGDP'] == 0))].index)

# Réinitialiser les index après la suppression des lignes
taxE = taxE.reset_index(drop=True)

# Afficher le dataframe après suppression
taxE.head(5)

Unnamed: 0,country,year,ECGTE_EnvTaxes_DomCCY,ECGTE_EnvTaxes_PctGDP,ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY,ECGTEN_TaxesEnergy_InclFuelTrans_PctGDP,ECGTEP_TaxesPollution_DomCCY,ECGTEP_TaxesPollution_PctGDP,ECGTER_TaxesResources_DomCCY,ECGTER_TaxesResources_PctGDP,ECGTET_TaxesTrans_ExclFuelTrans_DomCCY,ECGTET_TaxesTrans_ExclFuelTrans_PctGDP
0,Albania,2015,43993140000.0,3.067206,37741110000.0,2.631314,1782069000.0,0.124246,32546493.0,0.002269,4437413000.0,0.309377
1,Albania,2016,47813790000.0,3.247163,40945620000.0,2.780726,1879970000.0,0.127674,52524339.0,0.003567,4935684000.0,0.335195
2,Albania,2017,47548580000.0,3.066373,40400040000.0,2.605369,1941324000.0,0.125195,61861356.38,0.003989,5145347000.0,0.33182
3,Albania,2018,51145590000.0,3.124865,43521820000.0,2.659072,2226251000.0,0.136018,56011991.0,0.003422,5341506000.0,0.326352
4,Albania,2019,53415650000.0,3.157133,45165300000.0,2.669496,2625011000.0,0.155151,58777099.5,0.003474,5566564000.0,0.329012


In [153]:
taxE = taxE.fillna(0)

In [154]:
taxE.to_csv('./output/Tax_Energy.csv', index=False)

In [155]:
# Changement de types
wec['year'] = wec['year'].astype(int)
data['year'] = data['year'].astype(int)
taxE['year'] = taxE['year'].astype(int)

wec['country'] = wec['country'].astype(str)
data['country'] = data['country'].astype(str)
taxE['country'] = taxE['country'].astype(str)

# Fusion des valeurs de Co2
wec = pd.merge(wec, taxE, on=["country", "year"], how='left')

In [156]:
# Créer un dictionnaire pour faire le mapping entre les pays et les continents
continent_mapping = {
    'ASEAN (Ember)': 'ASEAN (Ember)', 'Afghanistan': 'Asia', 'Africa': 'Africa', 'Africa (EI)': 'Africa (EI)', 'Africa (Ember)': 'Africa (Ember)', 'Africa (Shift)': 'Africa (Shift)', 'Albania': 'Europe', 'Algeria': 'Africa', 'American Samoa': 'Oceania', 'Angola': 'Africa', 'Antarctica': 'Antarctica', 'Antigua and Barbuda': 'North America', 'Argentina': 'South America', 'Armenia': 'Asia', 'Aruba': 'North America', 'Asia': 'Asia', 'Asia & Oceania (EIA)': 'Asia & Oceania (EIA)', 'Asia (Ember)': 'Asia (Ember)', 'Asia Pacific (EI)': 'Asia Pacific (EI)', 'Asia and Oceania (Shift)': 'Asia and Oceania (Shift)', 'Australia': 'Oceania', 'Australia and New Zealand (EIA)': 'Australia and New Zealand (EIA)', 'Austria': 'Europe', 'Azerbaijan': 'Asia', 'Bahamas': 'North America', 'Bahrain': 'Asia', 'Bangladesh': 'Asia', 'Barbados': 'North America', 'Belarus': 'Europe', 'Belgium': 'Europe', 'Belize': 'North America', 'Benin': 'Africa', 'Bermuda': 'North America', 'Bhutan': 'Asia', 'Bolivia': 'South America', 'Bosnia and Herzegovina': 'Europe', 'Botswana': 'Africa', 'Brazil': 'South America', 'British Virgin Islands': 'North America', 'Brunei': 'Asia', 'Bulgaria': 'Europe', 'Burkina Faso': 'Africa', 'Burundi': 'Africa', 'CIS (EI)': 'CIS (EI)', 'Cambodia': 'Asia', 'Cameroon': 'Africa', 'Canada': 'North America', 'Cape Verde': 'Africa', 'Cayman Islands': 'North America', 'Central & South America (EIA)': 'Central & South America (EIA)', 'Central African Republic': 'Africa', 'Central America (EI)': 'Central America (EI)', 'Central and South America (Shift)': 'Central and South America (Shift)', 'Chad': 'Africa', 'Chile': 'South America', 'China': 'Asia', 'Colombia': 'South America', 'Comoros': 'Africa', 'Congo': 'Africa', 'Cook Islands': 'Oceania', 'Costa Rica': 'North America', 'Cote d''Ivoire': 'Africa', 'Croatia': 'Europe', 'Cuba': 'North America', 'Cyprus': 'Europe', 'Czechia': 'Europe', 'Czechoslovakia': 'Europe', 'Democratic Republic of Congo': 'Africa', 'Denmark': 'Europe', 'Djibouti': 'Africa', 'Dominica': 'North America', 'Dominican Republic': 'North America', 'EU28 (Shift)': 'EU28 (Shift)', 'East Germany (EIA)': 'East Germany (EIA)', 'East Timor': 'Asia', 'Eastern Africa (EI)': 'Eastern Africa (EI)', 'Ecuador': 'South America', 'Egypt': 'Africa', 'El Salvador': 'North America', 'Equatorial Guinea': 'Africa', 'Eritrea': 'Africa', 'Estonia': 'Europe', 'Eswatini': 'Africa', 'Ethiopia': 'Africa', 'Eurasia (EIA)': 'Eurasia (EIA)', 'Eurasia (Shift)': 'Eurasia (Shift)', 'Europe': 'Europe', 'Europe (EI)': 'Europe (EI)', 'Europe (Ember)': 'Europe (Ember)', 'Europe (Shift)': 'Europe (Shift)', 'European Union (27)': 'European Union (27)', 'European Union (EIA)': 'European Union (EIA)', 'Falkland Islands': 'South America', 'Faroe Islands': 'Europe', 'Fiji': 'Oceania', 'Finland': 'Europe', 'France': 'Europe', 'French Guiana': 'South America', 'French Polynesia': 'Oceania', 'G20 (Ember)': 'G20 (Ember)', 'G7 (Ember)': 'G7 (Ember)', 'Gabon': 'Africa', 'Gambia': 'Africa', 'Georgia': 'Europe', 'Germany': 'Europe', 'Ghana': 'Africa', 'Gibraltar': 'Europe', 'Greece': 'Europe', 'Greenland': 'North America', 'Grenada': 'North America', 'Guadeloupe': 'North America', 'Guam': 'Oceania', 'Guatemala': 'North America', 'Guinea': 'Africa', 'Guinea-Bissau': 'Africa', 'Guyana': 'South America', 'Haiti': 'North America', 'Hawaiian Trade Zone (EIA)': 'Hawaiian Trade Zone (EIA)', 'High-income countries': 'High-income countries', 'Honduras': 'North America', 'Hong Kong': 'Asia', 'Hungary': 'Europe', 'IEO - Africa (EIA)': 'IEO - Africa (EIA)', 'IEO - Middle East (EIA)': 'IEO - Middle East (EIA)', 'IEO OECD - Europe (EIA)': 'IEO OECD - Europe (EIA)', 'Iceland': 'Europe', 'India': 'Asia', 'Indonesia': 'Asia', 'Iran': 'Asia', 'Iraq': 'Asia', 'Ireland': 'Europe', 'Israel': 'Asia', 'Italy': 'Europe', 'Jamaica': 'North America', 'Japan': 'Asia', 'Jordan': 'Asia', 'Kazakhstan': 'Europe', 'Kenya': 'Africa', 'Kiribati': 'Oceania', 'Kosovo': 'Europe', 'Kuwait': 'Asia', 'Kyrgyzstan': 'Asia', 'Laos': 'Asia', 'Latin America and Caribbean (Ember)': 'Latin America and Caribbean (Ember)', 'Latvia': 'Europe', 'Lebanon': 'Asia', 'Lesotho': 'Africa', 'Liberia': 'Africa', 'Libya': 'Africa', 'Lithuania': 'Europe', 'Low-income countries': 'Low-income countries', 'Lower-middle-income countries': 'Lower-middle-income countries', 'Luxembourg': 'Europe', 'Macao': 'Asia', 'Madagascar': 'Africa', 'Malawi': 'Africa', 'Malaysia': 'Asia', 'Maldives': 'Asia', 'Mali': 'Africa', 'Malta': 'Europe', 'Martinique': 'North America', 'Mauritania': 'Africa', 'Mauritius': 'Africa', 'Mexico': 'North America',' Mexico,'  'Chile, and other OECD Americas (EIA)': 'Mexico, Chile, and other OECD Americas (EIA)', 'Micronesia (country)': 'Oceania', 'Middle Africa (EI)': 'Middle Africa (EI)', 'Middle East (EI)': 'Middle East (EI)', 'Middle East (EIA)': 'Middle East (EIA)', 'Middle East (Ember)': 'Middle East (Ember)', 'Middle East (Shift)': 'Middle East (Shift)', 'Moldova': 'Europe', 'Mongolia': 'Asia', 'Montenegro': 'Europe', 'Montserrat': 'North America', 'Morocco': 'Africa', 'Mozambique': 'Africa', 'Myanmar': 'Asia', 'Namibia': 'Africa', 'Nauru': 'Oceania', 'Nepal': 'Asia', 'Netherlands': 'Europe', 'Netherlands Antilles': 'North America', 'New Caledonia': 'Oceania', 'New Zealand': 'Oceania', 'Nicaragua': 'North America', 'Niger': 'Africa', 'Nigeria': 'Africa', 'Niue': 'Oceania', 'Non-OECD (EI)': 'Non-OECD (EI)', 'Non-OECD (EIA)': 'Non-OECD (EIA)', 'Non-OPEC (EI)': 'Non-OPEC (EI)', 'Non-OPEC (EIA)': 'Non-OPEC (EIA)', 'North America': 'North America', 'North America (EI)': 'North America (EI)', 'North America (Ember)': 'North America (Ember)', 'North America (Shift)': 'North America (Shift)', 'North Korea': 'Asia', 'North Macedonia': 'Europe', 'Northern Mariana Islands': 'Oceania', 'Norway': 'Europe', 'OECD (EI)': 'OECD (EI)', 'OECD (EIA)': 'OECD (EIA)', 'OECD (Ember)': 'OECD (Ember)', 'OECD (Shift)': 'OECD (Shift)', 'OECD - Asia And Oceania (EIA)': 'OECD - Asia And Oceania (EIA)', 'OECD - Europe (EIA)': 'OECD - Europe (EIA)', 'OECD - North America (EIA)': 'OECD - North America (EIA)', 'OPEC (EI)': 'OPEC (EI)', 'OPEC (EIA)': 'OPEC (EIA)', 'OPEC (Shift)': 'OPEC (Shift)', 'OPEC - Africa (EIA)': 'OPEC - Africa (EIA)', 'OPEC - South America (EIA)': 'OPEC - South America (EIA)', 'Oceania': 'Oceania', 'Oceania (Ember)': 'Oceania (Ember)', 'Oman': 'Asia', 'Other Non-OECD - America (EIA)': 'Other Non-OECD - America (EIA)', 'Other Non-OECD - Asia (EIA)': 'Other Non-OECD - Asia (EIA)', 'Other Non-OECD - Europe and Eurasia (EIA)': 'Other Non-OECD - Europe and Eurasia (EIA)', 'Pakistan': 'Asia', 'Palestine': 'Asia', 'Panama': 'North America', 'Papua New Guinea': 'Oceania', 'Paraguay': 'South America', 'Persian Gulf (EIA)': 'Persian Gulf (EIA)', 'Persian Gulf (Shift)': 'Persian Gulf (Shift)', 'Peru': 'South America', 'Philippines': 'Asia', 'Poland': 'Europe', 'Portugal': 'Europe', 'Puerto Rico': 'North America', 'Qatar': 'Asia', 'Reunion': 'Africa', 'Romania': 'Europe', 'Russia': 'Europe', 'Rwanda': 'Africa', 'Saint Helena': 'Africa', 'Saint Kitts and Nevis': 'North America', 'Saint Lucia': 'North America', 'Saint Pierre and Miquelon': 'North America', 'Saint Vincent and the Grenadines': 'North America', 'Samoa': 'Oceania', 'Sao Tome and Principe': 'Africa', 'Saudi Arabia': 'Asia', 'Senegal': 'Africa', 'Serbia': 'Europe', 'Serbia and Montenegro': 'Europe', 'Seychelles': 'Africa', 'Sierra Leone': 'Africa', 'Singapore': 'Asia', 'Slovakia': 'Europe', 'Slovenia': 'Europe', 'Solomon Islands': 'Oceania', 'Somalia': 'Africa', 'South Africa': 'Africa', 'South America': 'South America', 'South Korea': 'Asia', 'South Korea and other OECD Asia (EIA)': 'South Korea and other OECD Asia (EIA)', 'South Sudan': 'Africa', 'South and Central America (EI)': 'South and Central America (EI)', 'Spain': 'Europe', 'Sri Lanka': 'Asia', 'Sudan': 'Africa', 'Suriname': 'South America', 'Sweden': 'Europe', 'Switzerland': 'Europe', 'Syria': 'Asia', 'Taiwan': 'Asia', 'Tajikistan': 'Asia', 'Tanzania': 'Africa', 'Thailand': 'Asia', 'Togo': 'Africa', 'Tonga': 'Oceania', 'Trinidad and Tobago': 'North America', 'Tunisia': 'Africa', 'Turkey': 'Europe', 'Turkmenistan': 'Asia', 'Turks and Caicos Islands': 'North America', 'Tuvalu': 'Oceania', 'U.S. Pacific Islands (EIA)': 'U.S. Pacific Islands (EIA)', 'U.S. Territories (EIA)': 'U.S. Territories (EIA)', 'USSR': 'USSR', 'Uganda': 'Africa', 'Ukraine': 'Europe', 'United Arab Emirates': 'Asia', 'United Kingdom': 'Europe', 'United States': 'North America', 'United States Pacific Islands (Shift)': 'United States Pacific Islands (Shift)', 'United States Territories (Shift)': 'United States Territories (Shift)', 'United States Virgin Islands': 'North America', 'Upper-middle-income countries': 'Upper-middle-income countries', 'Uruguay': 'South America', 'Uzbekistan': 'Asia', 'Vanuatu': 'Oceania', 'Venezuela': 'South America', 'Vietnam': 'Asia', 'Wake Island (EIA)': 'Wake Island (EIA)', 'Wake Island (Shift)': 'Wake Island (Shift)', 'West Germany (EIA)': 'West Germany (EIA)', 'Western Africa (EI)': 'Western Africa (EI)', 'Western Sahara': 'Western Sahara', 'World': 'World', 'Yemen': 'Asia', 'Yugoslavia': 'Europe', 'Zambia': 'Africa', 'Zimbabwe': 'Africa', 
}

# Appliquer le mapping pour créer une nouvelle colonne "continent"
wec['continent'] = wec['country'].map(continent_mapping)

# Insérer la colonne "continent" juste après la colonne "country"
wec.insert(wec.columns.get_loc('country') + 1, 'continent', wec.pop('continent'))

wec.head(5)

Unnamed: 0,country,continent,year,iso_code,population,gdp_usd,biofuel_cons_change_pct,biofuel_cons_change_twh,biofuel_cons_per_capita_kwh,biofuel_consumption_twh,...,ECGTE_EnvTaxes_DomCCY,ECGTE_EnvTaxes_PctGDP,ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY,ECGTEN_TaxesEnergy_InclFuelTrans_PctGDP,ECGTEP_TaxesPollution_DomCCY,ECGTEP_TaxesPollution_PctGDP,ECGTER_TaxesResources_DomCCY,ECGTER_TaxesResources_PctGDP,ECGTET_TaxesTrans_ExclFuelTrans_DomCCY,ECGTET_TaxesTrans_ExclFuelTrans_PctGDP
0,ASEAN (Ember),ASEAN (Ember),2000,,,,,,,,...,,,,,,,,,,
1,ASEAN (Ember),ASEAN (Ember),2001,,,,,,,,...,,,,,,,,,,
2,ASEAN (Ember),ASEAN (Ember),2002,,,,,,,,...,,,,,,,,,,
3,ASEAN (Ember),ASEAN (Ember),2003,,,,,,,,...,,,,,,,,,,
4,ASEAN (Ember),ASEAN (Ember),2004,,,,,,,,...,,,,,,,,,,


In [157]:
# Créer un dictionnaire pour faire le mapping entre les pays et les régions géographiques
region_mapping = {
'Afghanistan': 'South Asia', 'Albania': 'Eastern Europe and Central Asia', 'Algeria': 'Middle East and North Africa', 'American Samoa': 'East Asia & Pacific', 'Angola': 'Sub-Saharan Africa', 'Antigua and Barbuda': 'Americas & Caribbean', 'Argentina': 'Americas & Caribbean', 'Armenia': 'Eastern Europe and Central Asia', 'Aruba': 'Americas & Caribbean', 'Australia': 'East Asia & Pacific', 'Austria': 'Eastern Europe and Central Asia', 'Azerbaijan': 'Eastern Europe and Central Asia', 'Bahamas': 'Americas & Caribbean', 'Bahrain': 'Middle East and North Africa', 'Bangladesh': 'South Asia', 'Barbados': 'Americas & Caribbean', 'Belarus': 'Eastern Europe and Central Asia', 'Belgium': 'Western Europe', 'Belize': 'Americas & Caribbean', 'Benin': 'Sub-Saharan Africa', 'Bermuda': 'Americas & Caribbean', 'Bhutan': 'South Asia', 'Bolivia': 'Americas & Caribbean', 'Bosnia and Herzegovina': 'Eastern Europe and Central Asia', 'Botswana': 'Sub-Saharan Africa', 'Brazil': 'Americas & Caribbean', 'British Virgin Islands': 'Americas & Caribbean', 'Brunei': 'East Asia & Pacific', 'Bulgaria': 'Eastern Europe and Central Asia', 'Burkina Faso': 'Sub-Saharan Africa', 'Burundi': 'Sub-Saharan Africa', 'Cambodia': 'East Asia & Pacific', 'Cameroon': 'Sub-Saharan Africa', 'Canada': 'Americas & Caribbean', 'Cape Verde': 'Sub-Saharan Africa', 'Cayman Islands': 'Americas & Caribbean', 'Central African Republic': 'Sub-Saharan Africa', 'Chad': 'Sub-Saharan Africa', 'Chile': 'Americas & Caribbean', 'China': 'East Asia & Pacific', 'Colombia': 'Americas & Caribbean', 'Comoros': 'Sub-Saharan Africa', 'Congo': 'Sub-Saharan Africa', 'Cook Islands': 'East Asia & Pacific', 'Costa Rica': 'Americas & Caribbean', 'Cote d''Ivoire': 'Sub-Saharan Africa', 'Croatia': 'Eastern Europe and Central Asia', 'Cuba': 'Americas & Caribbean', 'Cyprus': 'Eastern Europe and Central Asia', 'Czechia': 'Eastern Europe and Central Asia', 'Czechoslovakia': 'Eastern Europe and Central Asia', 'Democratic Republic of Congo': 'Sub-Saharan Africa', 'Denmark': 'Western Europe', 'Djibouti': 'Middle East and North Africa', 'Dominica': 'Americas & Caribbean', 'Dominican Republic': 'Americas & Caribbean', 'East Timor': 'East Asia & Pacific', 'Ecuador': 'Americas & Caribbean', 'Egypt': 'Middle East and North Africa', 'El Salvador': 'Americas & Caribbean', 'Equatorial Guinea': 'Sub-Saharan Africa', 'Eritrea': 'Sub-Saharan Africa', 'Estonia': 'Eastern Europe and Central Asia', 'Eswatini': 'Sub-Saharan Africa', 'Ethiopia': 'Sub-Saharan Africa', 'Falkland Islands': 'Americas & Caribbean', 'Faroe Islands': 'Western Europe', 'Fiji': 'East Asia & Pacific', 'Finland': 'Western Europe', 'France': 'Western Europe', 'French Guiana': 'Americas & Caribbean', 'French Polynesia': 'East Asia & Pacific', 'Gabon': 'Sub-Saharan Africa', 'Gambia': 'Sub-Saharan Africa', 'Georgia': 'Eastern Europe and Central Asia', 'Germany': 'Western Europe', 'Ghana': 'Sub-Saharan Africa', 'Gibraltar': 'Western Europe', 'Greece': 'Western Europe', 'Greenland': 'Americas & Caribbean', 'Grenada': 'Americas & Caribbean', 'Guadeloupe': 'Americas & Caribbean', 'Guam': 'East Asia & Pacific', 'Guatemala': 'Americas & Caribbean', 'Guinea': 'Sub-Saharan Africa', 'Guinea-Bissau': 'Sub-Saharan Africa', 'Guyana': 'Americas & Caribbean', 'Haiti': 'Americas & Caribbean', 'Honduras': 'Americas & Caribbean', 'Hong Kong': 'East Asia & Pacific', 'Hungary': 'Eastern Europe and Central Asia', 'Iceland': 'Western Europe', 'India': 'South Asia', 'Indonesia': 'East Asia & Pacific', 'Iran': 'Middle East and North Africa', 'Iraq': 'Middle East and North Africa', 'Ireland': 'Western Europe', 'Israel': 'Middle East and North Africa', 'Italy': 'Western Europe', 'Jamaica': 'Americas & Caribbean', 'Japan': 'East Asia & Pacific', 'Jordan': 'Middle East and North Africa', 'Kazakhstan': 'Eastern Europe and Central Asia', 'Kenya': 'Sub-Saharan Africa', 'Kiribati': 'East Asia & Pacific', 'Kosovo': 'Eastern Europe and Central Asia', 'Kuwait': 'Middle East and North Africa', 'Kyrgyzstan': 'Eastern Europe and Central Asia', 'Laos': 'East Asia & Pacific', 'Latvia': 'Eastern Europe and Central Asia', 'Lebanon': 'Middle East and North Africa', 'Lesotho': 'Sub-Saharan Africa', 'Liberia': 'Sub-Saharan Africa', 'Libya': 'Middle East and North Africa', 'Lithuania': 'Eastern Europe and Central Asia', 'Luxembourg': 'Western Europe', 'Macao': 'East Asia & Pacific', 'Madagascar': 'Sub-Saharan Africa', 'Malawi': 'Sub-Saharan Africa', 'Malaysia': 'East Asia & Pacific', 'Maldives': 'South Asia', 'Mali': 'Sub-Saharan Africa', 'Malta': 'Middle East and North Africa', 'Martinique': 'Americas & Caribbean', 'Mauritania': 'Sub-Saharan Africa', 'Mauritius': 'Sub-Saharan Africa', 'Mexico': 'Americas & Caribbean', 'Micronesia (country)': 'East Asia & Pacific', 'Moldova': 'Eastern Europe and Central Asia', 'Mongolia': 'East Asia & Pacific', 'Montenegro': 'Eastern Europe and Central Asia', 'Montserrat': 'Americas & Caribbean', 'Morocco': 'Middle East and North Africa', 'Mozambique': 'Sub-Saharan Africa', 'Myanmar': 'East Asia & Pacific', 'Namibia': 'Sub-Saharan Africa', 'Nauru': 'East Asia & Pacific', 'Nepal': 'South Asia', 'Netherlands': 'Western Europe', 'Netherlands Antilles': 'Americas & Caribbean', 'New Caledonia': 'East Asia & Pacific', 'New Zealand': 'East Asia & Pacific', 'Nicaragua': 'Americas & Caribbean', 'Niger': 'Sub-Saharan Africa', 'Nigeria': 'Sub-Saharan Africa', 'Niue': 'East Asia & Pacific', 'North Korea': 'East Asia & Pacific', 'North Macedonia': 'Eastern Europe and Central Asia', 'Northern Mariana Islands': 'East Asia & Pacific', 'Norway': 'Western Europe', 'Oman': 'Middle East and North Africa', 'Pakistan': 'South Asia', 'Palestine': 'Eastern Europe and Central Asia', 'Panama': 'Americas & Caribbean', 'Papua New Guinea': 'East Asia & Pacific', 'Paraguay': 'Americas & Caribbean', 'Peru': 'Americas & Caribbean', 'Philippines': 'East Asia & Pacific', 'Poland': 'Eastern Europe and Central Asia', 'Portugal': 'Western Europe', 'Puerto Rico': 'Americas & Caribbean', 'Qatar': 'Middle East and North Africa', 'Reunion': 'Middle East and North Africa', 'Romania': 'Eastern Europe and Central Asia', 'Russia': 'Eastern Europe and Central Asia', 'Rwanda': 'Sub-Saharan Africa', 'Saint Helena': 'Middle East and North Africa', 'Saint Kitts and Nevis': 'Americas & Caribbean', 'Saint Lucia': 'Americas & Caribbean', 'Saint Pierre and Miquelon': 'Americas & Caribbean', 'Saint Vincent and the Grenadines': 'Americas & Caribbean', 'Samoa': 'East Asia & Pacific', 'Sao Tome and Principe': 'Sub-Saharan Africa', 'Saudi Arabia': 'Middle East and North Africa', 'Senegal': 'Sub-Saharan Africa', 'Serbia': 'Eastern Europe and Central Asia', 'Serbia and Montenegro': 'Eastern Europe and Central Asia', 'Seychelles': 'Sub-Saharan Africa', 'Sierra Leone': 'Sub-Saharan Africa', 'Singapore': 'East Asia & Pacific', 'Slovakia': 'Eastern Europe and Central Asia', 'Slovenia': 'Eastern Europe and Central Asia', 'Solomon Islands': 'East Asia & Pacific', 'Somalia': 'Sub-Saharan Africa', 'South Africa': 'Sub-Saharan Africa', 'South Korea': 'East Asia & Pacific', 'South Sudan': 'Sub-Saharan Africa', 'Spain': 'Western Europe', 'Sri Lanka': 'South Asia', 'Sudan': 'Sub-Saharan Africa', 'Suriname': 'Americas & Caribbean', 'Sweden': 'Western Europe', 'Switzerland': 'Western Europe', 'Syria': 'Middle East and North Africa', 'Taiwan': 'East Asia & Pacific', 'Tajikistan': 'Eastern Europe and Central Asia', 'Tanzania': 'Sub-Saharan Africa', 'Thailand': 'East Asia & Pacific', 'Togo': 'Sub-Saharan Africa', 'Tonga': 'East Asia & Pacific', 'Trinidad and Tobago': 'Americas & Caribbean', 'Tunisia': 'Middle East and North Africa', 'Turkey': 'Eastern Europe and Central Asia', 'Turkmenistan': 'Eastern Europe and Central Asia', 'Turks and Caicos Islands': 'Americas & Caribbean', 'Tuvalu': 'East Asia & Pacific', 'Uganda': 'Sub-Saharan Africa', 'Ukraine': 'Eastern Europe and Central Asia', 'United Arab Emirates': 'Middle East and North Africa', 'United Kingdom': 'Western Europe', 'United States': 'Americas & Caribbean', 'United States Virgin Islands': 'Americas & Caribbean', 'Uruguay': 'Americas & Caribbean', 'Uzbekistan': 'Eastern Europe and Central Asia', 'Vanuatu': 'East Asia & Pacific', 'Venezuela': 'Americas & Caribbean', 'Vietnam': 'East Asia & Pacific', 'World': 'World', 'Yemen': 'Middle East and North Africa', 'Yugoslavia': 'Eastern Europe and Central Asia', 'Zambia': 'Sub-Saharan Africa', 'Zimbabwe': 'Sub-Saharan Africa', 'ASEAN (Ember)': 'ASEAN (Ember)', 'Africa': 'Africa', 'Africa (EI)': 'Africa (EI)', 'Africa (Ember)': 'Africa (Ember)', 'Africa (Shift)': 'Africa (Shift)', 'Antarctica': 'Antarctica', 'Asia': 'Asia', 'Asia & Oceania (EIA)': 'Asia & Oceania (EIA)', 'Asia (Ember)': 'Asia (Ember)', 'Asia Pacific (EI)': 'Asia Pacific (EI)', 'Asia and Oceania (Shift)': 'Asia and Oceania (Shift)', 'Australia and New Zealand (EIA)': 'Australia and New Zealand (EIA)', 'CIS (EI)': 'CIS (EI)', 'Central & South America (EIA)': 'Central & South America (EIA)', 'Central America (EI)': 'Central America (EI)', 'Central and South America (Shift)': 'Central and South America (Shift)', 'EU28 (Shift)': 'EU28 (Shift)', 'East Germany (EIA)': 'East Germany (EIA)', 'Eastern Africa (EI)': 'Eastern Africa (EI)', 'Eurasia (EIA)': 'Eurasia (EIA)', 'Eurasia (Shift)': 'Eurasia (Shift)', 'Europe': 'Europe', 'Europe (EI)': 'Europe (EI)', 'Europe (Ember)': 'Europe (Ember)', 'Europe (Shift)': 'Europe (Shift)', 'European Union (27)': 'European Union (27)', 'European Union (EIA)': 'European Union (EIA)', 'G20 (Ember)': 'G20 (Ember)', 'G7 (Ember)': 'G7 (Ember)', 'Hawaiian Trade Zone (EIA)': 'Hawaiian Trade Zone (EIA)', 'High-income countries': 'High-income countries', 'IEO - Africa (EIA)': 'IEO - Africa (EIA)', 'IEO - Middle East (EIA)': 'IEO - Middle East (EIA)', 'IEO OECD - Europe (EIA)': 'IEO OECD - Europe (EIA)', 'Latin America and Caribbean (Ember)': 'Latin America and Caribbean (Ember)', 'Low-income countries': 'Low-income countries', 'Lower-middle-income countries': 'Lower-middle-income countries', 'Mexico, Chile, and other OECD Americas (EIA)': 'Mexico, Chile, and other OECD Americas (EIA)', 'Middle Africa (EI)': 'Middle Africa (EI)', 'Middle East (EI)': 'Middle East (EI)', 'Middle East (EIA)': 'Middle East (EIA)', 'Middle East (Ember)': 'Middle East (Ember)', 'Middle East (Shift)': 'Middle East (Shift)', 'Non-OECD (EI)': 'Non-OECD (EI)', 'Non-OECD (EIA)': 'Non-OECD (EIA)', 'Non-OPEC (EI)': 'Non-OPEC (EI)', 'Non-OPEC (EIA)': 'Non-OPEC (EIA)', 'North America': 'North America', 'North America (EI)': 'North America (EI)', 'North America (Ember)': 'North America (Ember)', 'North America (Shift)': 'North America (Shift)', 'OECD (EI)': 'OECD (EI)', 'OECD (EIA)': 'OECD (EIA)', 'OECD (Ember)': 'OECD (Ember)', 'OECD (Shift)': 'OECD (Shift)', 'OECD - Asia And Oceania (EIA)': 'OECD - Asia And Oceania (EIA)', 'OECD - Europe (EIA)': 'OECD - Europe (EIA)', 'OECD - North America (EIA)': 'OECD - North America (EIA)', 'OPEC (EI)': 'OPEC (EI)', 'OPEC (EIA)': 'OPEC (EIA)', 'OPEC (Shift)': 'OPEC (Shift)', 'OPEC - Africa (EIA)': 'OPEC - Africa (EIA)', 'OPEC - South America (EIA)': 'OPEC - South America (EIA)', 'Oceania': 'Oceania', 'Oceania (Ember)': 'Oceania (Ember)', 'Other Non-OECD - America (EIA)': 'Other Non-OECD - America (EIA)', 'Other Non-OECD - Asia (EIA)': 'Other Non-OECD - Asia (EIA)', 'Other Non-OECD - Europe and Eurasia (EIA)': 'Other Non-OECD - Europe and Eurasia (EIA)', 'Persian Gulf (EIA)': 'Persian Gulf (EIA)', 'Persian Gulf (Shift)': 'Persian Gulf (Shift)', 'South America': 'South America', 'South Korea and other OECD Asia (EIA)': 'South Korea and other OECD Asia (EIA)', 'South and Central America (EI)': 'South and Central America (EI)', 'U.S. Pacific Islands (EIA)': 'U.S. Pacific Islands (EIA)', 'U.S. Territories (EIA)': 'U.S. Territories (EIA)', 'USSR': 'USSR', 'United States Pacific Islands (Shift)': 'United States Pacific Islands (Shift)', 'United States Territories (Shift)': 'United States Territories (Shift)', 'Upper-middle-income countries': 'Upper-middle-income countries', 'Wake Island (EIA)': 'Wake Island (EIA)', 'Wake Island (Shift)': 'Wake Island (Shift)', 'West Germany (EIA)': 'West Germany (EIA)', 'Western Africa (EI)': 'Western Africa (EI)', 'Western Sahara': 'Western Sahara', 
}

# Appliquer le mapping pour créer une nouvelle colonne "region"
wec['region'] = wec['country'].map(region_mapping)

# Insérer la colonne "region" juste après la colonne "continent"
wec.insert(wec.columns.get_loc('continent') + 1, 'region', wec.pop('region'))

wec.head(5)

Unnamed: 0,country,continent,region,year,iso_code,population,gdp_usd,biofuel_cons_change_pct,biofuel_cons_change_twh,biofuel_cons_per_capita_kwh,...,ECGTE_EnvTaxes_DomCCY,ECGTE_EnvTaxes_PctGDP,ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY,ECGTEN_TaxesEnergy_InclFuelTrans_PctGDP,ECGTEP_TaxesPollution_DomCCY,ECGTEP_TaxesPollution_PctGDP,ECGTER_TaxesResources_DomCCY,ECGTER_TaxesResources_PctGDP,ECGTET_TaxesTrans_ExclFuelTrans_DomCCY,ECGTET_TaxesTrans_ExclFuelTrans_PctGDP
0,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2000,,,,,,,...,,,,,,,,,,
1,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2001,,,,,,,...,,,,,,,,,,
2,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2002,,,,,,,...,,,,,,,,,,
3,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2003,,,,,,,...,,,,,,,,,,
4,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2004,,,,,,,...,,,,,,,,,,


In [158]:
# Sélectionner les pays à inclure dans aa_world
selected_countries = ['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands', 'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo', 'Cook Islands', 'Costa Rica', 'Cote d''Ivoire', 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Czechoslovakia', 'Democratic Republic of Congo', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'East Timor', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'Falkland Islands', 'Faroe Islands', 'Fiji', 'Finland', 'France', 'French Guiana', 'French Polynesia', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Gibraltar', 'Greece', 'Greenland', 'Grenada', 'Guadeloupe', 'Guam', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Honduras', 'Hong Kong', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', 'Kosovo', 'Kuwait', 'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Lithuania', 'Luxembourg', 'Macao', 'Madagascar', 'Malawi', 'Malaysia', 'Maldives', 'Mali', 'Malta', 'Martinique', 'Mauritania', 'Mauritius', 'Mexico', 'Micronesia (country)', 'Moldova', 'Mongolia', 'Montenegro', 'Montserrat', 'Morocco', 'Mozambique', 'Myanmar', 'Namibia', 'Nauru', 'Nepal', 'Netherlands', 'Netherlands Antilles', 'New Caledonia', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'Niue', 'North Korea', 'North Macedonia', 'Northern Mariana Islands', 'Norway', 'Oman', 'Pakistan', 'Palestine', 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Puerto Rico', 'Qatar', 'Reunion', 'Romania', 'Russia', 'Rwanda', 'Saint Helena', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Pierre and Miquelon', 'Saint Vincent and the Grenadines', 'Samoa', 'Sao Tome and Principe', 'Saudi Arabia', 'Senegal', 'Serbia', 'Serbia and Montenegro', 'Seychelles', 'Sierra Leone', 'Singapore', 'Slovakia', 'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa', 'South Korea', 'South Sudan', 'Spain', 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland', 'Syria', 'Taiwan', 'Tajikistan', 'Tanzania', 'Thailand', 'Togo', 'Tonga', 'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Turks and Caicos Islands', 'Tuvalu', 'Uganda', 'Ukraine', 'United Arab Emirates', 'United Kingdom', 'United States', 'United States Virgin Islands', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Venezuela', 'Vietnam', 'Yemen', 'Yugoslavia', 'Zambia', 'Zimbabwe']

# Filtrer le dataset pour inclure seulement les pays sélectionnés
selected_wec = wec[wec['country'].isin(selected_countries)]

# Grouper par année et calculer la somme des valeurs pour chaque année
world_values = selected_wec.groupby('year').sum().reset_index()

# Remplacer les valeurs de "country", "continent", "region" par "all_world"
world_values['country'] = 'all_world'
world_values['continent'] = 'all_world'
world_values['region'] = 'all_world'
world_values['iso_code'] = 'all_world'

# Ajouter les valeurs mondiales au dataset d'origine
wec = pd.concat([wec, world_values], ignore_index=True)

wec.head(5)

Unnamed: 0,country,continent,region,year,iso_code,population,gdp_usd,biofuel_cons_change_pct,biofuel_cons_change_twh,biofuel_cons_per_capita_kwh,...,ECGTE_EnvTaxes_DomCCY,ECGTE_EnvTaxes_PctGDP,ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY,ECGTEN_TaxesEnergy_InclFuelTrans_PctGDP,ECGTEP_TaxesPollution_DomCCY,ECGTEP_TaxesPollution_PctGDP,ECGTER_TaxesResources_DomCCY,ECGTER_TaxesResources_PctGDP,ECGTET_TaxesTrans_ExclFuelTrans_DomCCY,ECGTET_TaxesTrans_ExclFuelTrans_PctGDP
0,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2000,,,,,,,...,,,,,,,,,,
1,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2001,,,,,,,...,,,,,,,,,,
2,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2002,,,,,,,...,,,,,,,,,,
3,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2003,,,,,,,...,,,,,,,,,,
4,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2004,,,,,,,...,,,,,,,,,,


In [159]:
# Calculer les valeurs en USD pour chaque colonne et les insérer à côté des colonnes correspondantes
wec.insert(wec.columns.get_loc('ECGTE_EnvTaxes_DomCCY') + 1, 'ECGTE_EnvTaxes_usd-eq', wec['ECGTE_EnvTaxes_PctGDP'] * wec['gdp_usd'])
wec.insert(wec.columns.get_loc('ECGTEN_TaxesEnergy_InclFuelTrans_DomCCY') + 1, 'ECGTEN_TaxesEnergy_InclFuelTrans_usd-eq', wec['ECGTEN_TaxesEnergy_InclFuelTrans_PctGDP'] * wec['gdp_usd'])
wec.insert(wec.columns.get_loc('ECGTEP_TaxesPollution_DomCCY') + 1, 'ECGTEP_TaxesPollution_usd-eq', wec['ECGTEP_TaxesPollution_PctGDP'] * wec['gdp_usd'])
wec.insert(wec.columns.get_loc('ECGTER_TaxesResources_DomCCY') + 1, 'ECGTER_TaxesResources_usd-eq', wec['ECGTER_TaxesResources_PctGDP'] * wec['gdp_usd'])
wec.insert(wec.columns.get_loc('ECGTET_TaxesTrans_ExclFuelTrans_DomCCY') + 1, 'ECGTET_TaxesTrans_ExclFuelTrans_usd-eq', wec['ECGTET_TaxesTrans_ExclFuelTrans_PctGDP'] * wec['gdp_usd'])

# Valeurs enregistrés
wec.to_csv('./output/World_Energy_Consumption_Full.csv', index=False)
# Afficher les premières lignes pour vérifier
wec.head(5)

Unnamed: 0,country,continent,region,year,iso_code,population,gdp_usd,biofuel_cons_change_pct,biofuel_cons_change_twh,biofuel_cons_per_capita_kwh,...,ECGTEN_TaxesEnergy_InclFuelTrans_PctGDP,ECGTEP_TaxesPollution_DomCCY,ECGTEP_TaxesPollution_usd-eq,ECGTEP_TaxesPollution_PctGDP,ECGTER_TaxesResources_DomCCY,ECGTER_TaxesResources_usd-eq,ECGTER_TaxesResources_PctGDP,ECGTET_TaxesTrans_ExclFuelTrans_DomCCY,ECGTET_TaxesTrans_ExclFuelTrans_usd-eq,ECGTET_TaxesTrans_ExclFuelTrans_PctGDP
0,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2000,,,,,,,...,,,,,,,,,,
1,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2001,,,,,,,...,,,,,,,,,,
2,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2002,,,,,,,...,,,,,,,,,,
3,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2003,,,,,,,...,,,,,,,,,,
4,ASEAN (Ember),ASEAN (Ember),ASEAN (Ember),2004,,,,,,,...,,,,,,,,,,


In [160]:
# Charger le fichier CSV dans un DataFrame
df_price = pd.read_csv('./assets/energy_prices.csv')

# Définir les nouveaux noms de colonnes
nouveaux_noms = {
    'Crude Oil': 'price_crude_oil_usd_per_bbl',
    'Coal': 'price_coal_usd_per_mt',
    'Natural Gas': 'price_natural_gas_usd_per_mmbtu',
    'Year': 'year'
}

# Renommer les colonnes
df_price.rename(columns=nouveaux_noms, inplace=True)

df_price = df_price.fillna(0)

df_price['price_crude_oil_usd_per_bbl'] = df_price['price_crude_oil_usd_per_bbl'].astype(float)
df_price['price_coal_usd_per_mt'] = df_price['price_coal_usd_per_mt'].astype(float)
df_price['price_natural_gas_usd_per_mmbtu'] = df_price['price_natural_gas_usd_per_mmbtu'].astype(float)
df_price['year'] = df_price['year'].astype(int)

df_price.head(10)



Unnamed: 0,year,price_crude_oil_usd_per_bbl,price_coal_usd_per_mt,price_natural_gas_usd_per_mmbtu
0,1960,1.63,0.0,0.14
1,1961,1.57,0.0,0.15
2,1962,1.52,0.0,0.16
3,1963,1.5,0.0,0.16
4,1964,1.45,0.0,0.15
5,1965,1.42,0.0,0.16
6,1966,1.36,0.0,0.16
7,1967,1.33,0.0,0.16
8,1968,1.32,0.0,0.16
9,1969,1.27,0.0,0.17


In [161]:
# Fusion des valeurs pour les prix des energies fossiles
wec = pd.merge(wec, df_price, on=["year"], how='left')
# Valeurs enregistrés
wec.to_csv('./output/World_Energy_Consumption_Full.csv', index=False)


In [162]:

columns_to_keep = ['year', 'continent', 'country', 'region']
wec = wec.drop(columns=['iso_code'])
melted_df = pd.melt(wec, id_vars=columns_to_keep, var_name='type_data', value_name='value')
melted_df = melted_df.dropna()

print(melted_df.count)

<bound method DataFrame.count of          year  continent      country      region  \
23       1960       Asia  Afghanistan  South Asia   
24       1961       Asia  Afghanistan  South Asia   
25       1962       Asia  Afghanistan  South Asia   
26       1963       Asia  Afghanistan  South Asia   
27       1964       Asia  Afghanistan  South Asia   
...       ...        ...          ...         ...   
2174269  2017  all_world    all_world   all_world   
2174270  2018  all_world    all_world   all_world   
2174271  2019  all_world    all_world   all_world   
2174272  2020  all_world    all_world   all_world   
2174273  2021  all_world    all_world   all_world   

                               type_data       value  
23                            population  8622473.00  
24                            population  8790140.00  
25                            population  8969055.00  
26                            population  9157463.00  
27                            population  9355510.00  


In [163]:
melted_df.to_csv('./output/World_Energy_Consumption_Reordered.csv', index=False)