In [184]:
import pandas as pd

data = pd.DataFrame(columns = ['country', 'year', 'pollution_co2'])
df = pd.read_csv('./assets/CO2_Emissions_1960-2018.csv')

# Pays gardés
kept_countries = ['France', 'Italy', 'Spain', 'Germany', 'Portugal', 'United Kingdom']
limit_to_kept_countries = False

df.head(5)

Unnamed: 0,Country Name,1960,1961,1962,1963,1964,1965,1966,1967,1968,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Aruba,204.631696,208.837879,226.08189,214.785217,207.626699,185.213644,172.158729,210.819017,194.917536,...,,,,,,,,,,
1,Africa Eastern and Southern,0.90606,0.922474,0.930816,0.94057,0.996033,1.04728,1.033908,1.052204,1.079727,...,1.021954,1.048876,1.005338,1.021646,1.031833,1.041145,0.987393,0.971016,0.959978,0.933541
2,Afghanistan,0.046057,0.053589,0.073721,0.074161,0.086174,0.101285,0.107399,0.123409,0.115142,...,0.211306,0.297065,0.407074,0.335351,0.263716,0.234037,0.232176,0.208857,0.203328,0.200151
3,Africa Western and Central,0.09088,0.095283,0.096612,0.112376,0.133258,0.184803,0.193676,0.189305,0.143989,...,0.42677,0.472819,0.497023,0.490867,0.504655,0.507671,0.480743,0.472959,0.476438,0.515544
4,Angola,0.100835,0.082204,0.210533,0.202739,0.213562,0.205891,0.268937,0.172096,0.289702,...,1.205902,1.221515,1.216317,1.204799,1.261542,1.285365,1.260921,1.227703,1.034317,0.88738


In [185]:
# Récupération de la liste des pays
countries = df['Country Name'].unique().tolist()
print(countries)
df.set_index('Country Name', inplace=True)

['Aruba', 'Africa Eastern and Southern', 'Afghanistan', 'Africa Western and Central', 'Angola', 'Albania', 'Andorra', 'Arab World', 'United Arab Emirates', 'Argentina', 'Armenia', 'American Samoa', 'Antigua and Barbuda', 'Australia', 'Austria', 'Azerbaijan', 'Burundi', 'Belgium', 'Benin', 'Burkina Faso', 'Bangladesh', 'Bulgaria', 'Bahrain', 'Bahamas, The', 'Bosnia and Herzegovina', 'Belarus', 'Belize', 'Bermuda', 'Bolivia', 'Brazil', 'Barbados', 'Brunei Darussalam', 'Bhutan', 'Botswana', 'Central African Republic', 'Canada', 'Central Europe and the Baltics', 'Switzerland', 'Channel Islands', 'Chile', 'China', "Cote d'Ivoire", 'Cameroon', 'Congo, Dem. Rep.', 'Congo, Rep.', 'Colombia', 'Comoros', 'Cabo Verde', 'Costa Rica', 'Caribbean small states', 'Cuba', 'Curacao', 'Cayman Islands', 'Cyprus', 'Czech Republic', 'Germany', 'Djibouti', 'Dominica', 'Denmark', 'Dominican Republic', 'Algeria', 'East Asia & Pacific (excluding high income)', 'Early-demographic dividend', 'East Asia & Pacific'

In [186]:
# Récupération de la liste des années
columns = df.columns
years = columns[1:].tolist()
print(years)

['1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']


In [187]:
# Création nouveau dataframe organisé par pays puis années
data_list = []

for country in countries:
    #values = df[df['Country Name'] == country]
    for year in years:
        value = df.loc[country, year]
        #print(value)
        data_list.append({'country': country, 'year': year, 'pollution_co2': value})
data = pd.DataFrame(data_list, columns=['country', 'year', 'pollution_co2'])
data.head(5)

Unnamed: 0,country,year,pollution_co2
0,Aruba,1961,208.837879
1,Aruba,1962,226.08189
2,Aruba,1963,214.785217
3,Aruba,1964,207.626699
4,Aruba,1965,185.213644


In [194]:
data.dtypes

country           object
year               int32
pollution_co2    float64
dtype: object

In [188]:
data.shape

(15428, 3)

In [189]:
# Suppression des valeurs inutiles si nécessaires
nb_na = data['pollution_co2'].isna().sum()
print("Nombre de valeurs invalides ou nulls: " + str(nb_na))
if nb_na > 0:
    data.dropna(subset=['pollution_co2'], inplace=True)
data.shape

Nombre de valeurs invalides ou nulls: 2289


(13139, 3)

In [190]:
data.to_csv('./output/CO2_Emissions_sortedByYearAndCountry.csv', index=False)

In [191]:
# Retirer les valeurs des pays différents de cette liste et exporter le fichier
if limit_to_kept_countries:
    data = data[data.country.isin(kept_countries)]
data.to_csv('./output/CO2_Emissions_sortedByYearAndCountry.csv', index=False)


In [192]:
# Charger le fichier World Energy Consumption
wec = pd.read_csv('./assets/World Energy Consumption.csv')
if limit_to_kept_countries:
    wec = wec[wec.country.isin(kept_countries)]
wec.to_csv('./output/World Energy Consumption (limited).csv', index=False)

In [193]:
# Changement de types
data['year'] = data['year'].astype(int)
wec['year'] = wec['year'].astype(int)

wec['country'] = wec['country'].astype(str)
data['country'] = data['country'].astype(str)

# Fusion des valeurs de Co2
wec = pd.merge(wec, data, on=["country", "year"])
            
# Supprimer la colonne 'population'
# wec = wec.drop('population', axis=1, errors='ignore')
# wec = wec.drop('iso_code', axis=1, errors='ignore')
# wec = wec.drop('gdp', axis=1, errors='ignore')

wec = wec[wec['year'] >= 1960]

# Valeurs enregistrés
wec.to_csv('./output/World Energy Consumption (limited).csv', index=False)

In [151]:
#Dataset Taxes sur les énergies

In [152]:
import pandas as pd
from matplotlib import pyplot as plt
# Charger le dataset des taxes sur les énergies
tax = pd.read_csv('./assets/Tax_Energy.csv')
tax.head(5)

Unnamed: 0,ObjectId,Country,ISO2,ISO3,Indicator,Source,CTS Code,CTS Name,CTS Full Descriptor,Unit,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,1,Albania,AL,ALB,Environmental Taxes,Organisation for Economic Co-operation and Dev...,ECGTE,Environmental Taxes,"Environment, Climate Change, Government Policy...",Domestic Currency,...,,,,43993140000.0,47813790000.0,47548580000.0,51145590000.0,53415650000.0,,
1,2,Albania,AL,ALB,Environmental Taxes,Organisation for Economic Co-operation and Dev...,ECGTE,Environmental Taxes,"Environment, Climate Change, Government Policy...",Percent of GDP,...,,,,3.067206,3.247163,3.066373,3.124865,3.157133,,
2,3,Albania,AL,ALB,Taxes on Energy (including fuel for transport),Organisation for Economic Co-operation and Dev...,ECGTEN,Taxes on Energy (Including Fuel for Transport),"Environment, Climate Change, Government Policy...",Domestic Currency,...,,,,37741110000.0,40945620000.0,40400040000.0,43521820000.0,45165300000.0,,
3,4,Albania,AL,ALB,Taxes on Energy (including fuel for transport),Organisation for Economic Co-operation and Dev...,ECGTEN,Taxes on Energy (Including Fuel for Transport),"Environment, Climate Change, Government Policy...",Percent of GDP,...,,,,2.631314,2.780726,2.605369,2.659072,2.669496,,
4,5,Albania,AL,ALB,Taxes on Pollution,Organisation for Economic Co-operation and Dev...,ECGTEP,Taxes on Pollution,"Environment, Climate Change, Government Policy...",Domestic Currency,...,,,,1782069000.0,1879970000.0,1941324000.0,2226251000.0,2625011000.0,,


In [153]:
tax_country = tax[tax["Country"] == "Andorra, Principality of"]
print(tax_country)

    ObjectId                   Country ISO2 ISO3  \
10        11  Andorra, Principality of   AD  AND   
11        12  Andorra, Principality of   AD  AND   
12        13  Andorra, Principality of   AD  AND   
13        14  Andorra, Principality of   AD  AND   
14        15  Andorra, Principality of   AD  AND   

                                            Indicator  \
10                                Environmental Taxes   
11     Taxes on Energy (including fuel for transport)   
12                                 Taxes on Pollution   
13                                 Taxes on Resources   
14  Taxes on Transport (excluding fuel for transport)   

                                               Source CTS Code  \
10  Organisation for Economic Co-operation and Dev...    ECGTE   
11  Organisation for Economic Co-operation and Dev...   ECGTEN   
12  Organisation for Economic Co-operation and Dev...   ECGTEP   
13  Organisation for Economic Co-operation and Dev...   ECGTER   
14  Organisati

In [154]:
# Compter le nombre de valeurs "NaN" dans les colonnes
nb_nan_colTax = tax.isna().sum()

print("Nombre de NaN dans les colonnes :", nb_nan_colTax)

# Compter le nombre de valeurs "NaN" dans le dataset
nb_nan_dataset = tax.isna().sum().sum()

print("Nombre de NaN dans le dataset :", nb_nan_dataset)

Nombre de NaN dans les colonnes : ObjectId                 0
Country                  0
ISO2                    10
ISO3                     8
Indicator                0
Source                   0
CTS Code                 0
CTS Name                 0
CTS Full Descriptor      0
Unit                     0
1995                   495
1996                   491
1997                   491
1998                   483
1999                   481
2000                   431
2001                   415
2002                   411
2003                   403
2004                   403
2005                   369
2006                   359
2007                   351
2008                   321
2009                   321
2010                   295
2011                   303
2012                   307
2013                   309
2014                   305
2015                    44
2016                    46
2017                    50
2018                    68
2019                   100
2020                 

In [155]:
# Regrouper les années dans une seule colonne
tax = pd.melt(tax, id_vars=["ObjectId", "Country", "ISO2", "ISO3", "Indicator", "Source", "CTS Code", "CTS Name", "CTS Full Descriptor", "Unit"], 
                    var_name="Année", value_name="Taxe")
tax.head(5)

Unnamed: 0,ObjectId,Country,ISO2,ISO3,Indicator,Source,CTS Code,CTS Name,CTS Full Descriptor,Unit,Année,Taxe
0,1,Albania,AL,ALB,Environmental Taxes,Organisation for Economic Co-operation and Dev...,ECGTE,Environmental Taxes,"Environment, Climate Change, Government Policy...",Domestic Currency,1995,
1,2,Albania,AL,ALB,Environmental Taxes,Organisation for Economic Co-operation and Dev...,ECGTE,Environmental Taxes,"Environment, Climate Change, Government Policy...",Percent of GDP,1995,
2,3,Albania,AL,ALB,Taxes on Energy (including fuel for transport),Organisation for Economic Co-operation and Dev...,ECGTEN,Taxes on Energy (Including Fuel for Transport),"Environment, Climate Change, Government Policy...",Domestic Currency,1995,
3,4,Albania,AL,ALB,Taxes on Energy (including fuel for transport),Organisation for Economic Co-operation and Dev...,ECGTEN,Taxes on Energy (Including Fuel for Transport),"Environment, Climate Change, Government Policy...",Percent of GDP,1995,
4,5,Albania,AL,ALB,Taxes on Pollution,Organisation for Economic Co-operation and Dev...,ECGTEP,Taxes on Pollution,"Environment, Climate Change, Government Policy...",Domestic Currency,1995,


In [156]:
# Concaténer les valeurs de "CTS Code" et "CTS Name" avec un séparateur "-"
tax["CTS Name"] = tax["CTS Code"] + " - " + tax["CTS Name"]

# Supprimer la colonne "CTS Code" puisque ses valeurs ont été concaténées avec "CTS Name"
# Supprimer les colonnes inutiles "ObjectId", "ISO2", "Indicator", "Source", "CTS Full Descriptor"
tax.drop(columns=["CTS Code", "ObjectId", "ISO2", "Indicator", "Source", "CTS Full Descriptor"], inplace=True)

tax.head(5)

Unnamed: 0,Country,ISO3,CTS Name,Unit,Année,Taxe
0,Albania,ALB,ECGTE - Environmental Taxes,Domestic Currency,1995,
1,Albania,ALB,ECGTE - Environmental Taxes,Percent of GDP,1995,
2,Albania,ALB,ECGTEN - Taxes on Energy (Including Fuel for T...,Domestic Currency,1995,
3,Albania,ALB,ECGTEN - Taxes on Energy (Including Fuel for T...,Percent of GDP,1995,
4,Albania,ALB,ECGTEP - Taxes on Pollution,Domestic Currency,1995,


In [157]:
tax.columns

Index(['Country', 'ISO3', 'CTS Name', 'Unit', 'Année', 'Taxe'], dtype='object')

In [158]:
tax.columns.dtype

dtype('O')

In [159]:
tax.dtypes

Country      object
ISO3         object
CTS Name     object
Unit         object
Année        object
Taxe        float64
dtype: object

In [160]:
tax.shape

(33453, 6)

In [161]:
# Vérifier les valeurs nulles dans le DataFrame
null_values = tax.isnull()

# Vérifier si le DataFrame est vide
is_empty = tax.empty

# Afficher les valeurs nulles
print("Valeurs nulles dans le DataFrame :")
print(null_values)

# Afficher si le DataFrame est vide
print("\nLe DataFrame est vide :", is_empty)

Valeurs nulles dans le DataFrame :
       Country   ISO3  CTS Name   Unit  Année  Taxe
0        False  False     False  False  False  True
1        False  False     False  False  False  True
2        False  False     False  False  False  True
3        False  False     False  False  False  True
4        False  False     False  False  False  True
...        ...    ...       ...    ...    ...   ...
33448    False  False     False  False  False  True
33449    False  False     False  False  False  True
33450    False  False     False  False  False  True
33451    False  False     False  False  False  True
33452    False  False     False  False  False  True

[33453 rows x 6 columns]

Le DataFrame est vide : False


In [162]:
# Vérifier s'il y a des valeurs "nan" ou "null" dans la colonne 'Taxe'
nan_colTaxe = tax['Taxe'].isna().any()

print("Valeurs nan ou null dans la colonne 'Taxe':", nan_colTaxe)

Valeurs nan ou null dans la colonne 'Taxe': True


In [163]:
# Compter le nombre de valeurs "NaN" dans la colonne 'Taxe'
nb_nan_colTax = tax['Taxe'].isna().sum()

print("Nombre de NaN dans la colonne 'Taxe':", nb_nan_colTax)

Nombre de NaN dans la colonne 'Taxe': 8823


In [164]:
# Filtrer les lignes avec des valeurs  puis les afficher
nan_rows = tax[tax.isna().any(axis=1)]

print("Lignes contenant des valeurs NaN :")
print(nan_rows)

Lignes contenant des valeurs NaN :
                  Country ISO3  \
0                 Albania  ALB   
1                 Albania  ALB   
2                 Albania  ALB   
3                 Albania  ALB   
4                 Albania  ALB   
...                   ...  ...   
33448  West Bank and Gaza  PSE   
33449  West Bank and Gaza  PSE   
33450  West Bank and Gaza  PSE   
33451  West Bank and Gaza  PSE   
33452  West Bank and Gaza  PSE   

                                                CTS Name               Unit  \
0                            ECGTE - Environmental Taxes  Domestic Currency   
1                            ECGTE - Environmental Taxes     Percent of GDP   
2      ECGTEN - Taxes on Energy (Including Fuel for T...  Domestic Currency   
3      ECGTEN - Taxes on Energy (Including Fuel for T...     Percent of GDP   
4                            ECGTEP - Taxes on Pollution  Domestic Currency   
...                                                  ...                ...   
334

In [165]:
# Compter le nombre de valeurs "NaN" dans les colonnes
nb_nan_colTax = tax.isna().sum()

print("Nombre de NaN dans les colonnes :", nb_nan_colTax)

# Compter le nombre de valeurs "NaN" dans le dataset
nb_nan_dataset = tax.isna().sum().sum()

print("Nombre de NaN dans le dataset :", nb_nan_dataset)

Nombre de NaN dans les colonnes : Country        0
ISO3         216
CTS Name       0
Unit           0
Année          0
Taxe        8823
dtype: int64
Nombre de NaN dans le dataset : 9039


In [166]:
print(tax.columns)

Index(['Country', 'ISO3', 'CTS Name', 'Unit', 'Année', 'Taxe'], dtype='object')


In [167]:
# Pivoter les données pour mettre les catégories des taxes de la colonne "CTS Name" en colonnes et pour mettre les unités des taxes de la colonne "Unit" en colonnes
taxE = tax.pivot_table(index=['Country', 'ISO3', 'Année'],
                               columns=['CTS Name', 'Unit'],
                               values='Taxe').reset_index()

taxE.head(5)

CTS Name,Country,ISO3,Année,ECGTE - Environmental Taxes,ECGTE - Environmental Taxes,ECGTEN - Taxes on Energy (Including Fuel for Transport),ECGTEN - Taxes on Energy (Including Fuel for Transport),ECGTEP - Taxes on Pollution,ECGTEP - Taxes on Pollution,ECGTER - Taxes on Resources,ECGTER - Taxes on Resources,ECGTET - Taxes on Transport (Excluding Fuel for Transport),ECGTET - Taxes on Transport (Excluding Fuel for Transport)
Unit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Domestic Currency,Percent of GDP,Domestic Currency,Percent of GDP,Domestic Currency,Percent of GDP,Domestic Currency,Percent of GDP,Domestic Currency,Percent of GDP
0,Albania,ALB,2015,43993140000.0,3.067206,37741110000.0,2.631314,1782069000.0,0.124246,32546493.0,0.002269,4437413000.0,0.309377
1,Albania,ALB,2016,47813790000.0,3.247163,40945620000.0,2.780726,1879970000.0,0.127674,52524339.0,0.003567,4935684000.0,0.335195
2,Albania,ALB,2017,47548580000.0,3.066373,40400040000.0,2.605369,1941324000.0,0.125195,61861356.38,0.003989,5145347000.0,0.33182
3,Albania,ALB,2018,51145590000.0,3.124865,43521820000.0,2.659072,2226251000.0,0.136018,56011991.0,0.003422,5341506000.0,0.326352
4,Albania,ALB,2019,53415650000.0,3.157133,45165300000.0,2.669496,2625011000.0,0.155151,58777099.5,0.003474,5566564000.0,0.329012


In [168]:
# Renommer les colonnes
taxE.columns = [
    'Pays',
    'ISO3',
    'Année',
    'ECGTE_TaxeEnv_DeviseNat',
    'ECGTE_TaxeEnv_PctPIB',
    'ECGTEN_TaxeEnergie_CarbIncl_DeviseNat',
    'ECGTEN_TaxeEnergie_CarbIncl_PctPIB',
    'ECGTEP_TaxePollution_DeviseNat',
    'ECGTEP_TaxePollution_PctPIB',
    'ECGTER_TaxeRess_DeviseNat',
    'ECGTER_TaxeRess_PctPIB',
    'ECGTET_TaxeTransp_CarbExcl_DeviseNat',
    'ECGTET_TaxeTransp_CarbExcl_PctPIB'
]
# Afficher les nouvelles colonnes
print(taxE.columns)


Index(['Pays', 'ISO3', 'Année', 'ECGTE_TaxeEnv_DeviseNat',
       'ECGTE_TaxeEnv_PctPIB', 'ECGTEN_TaxeEnergie_CarbIncl_DeviseNat',
       'ECGTEN_TaxeEnergie_CarbIncl_PctPIB', 'ECGTEP_TaxePollution_DeviseNat',
       'ECGTEP_TaxePollution_PctPIB', 'ECGTER_TaxeRess_DeviseNat',
       'ECGTER_TaxeRess_PctPIB', 'ECGTET_TaxeTransp_CarbExcl_DeviseNat',
       'ECGTET_TaxeTransp_CarbExcl_PctPIB'],
      dtype='object')


In [169]:
# Compter le nombre de valeurs "NaN" dans les colonnes
nb_nan_colTax_taxE = taxE.isna().sum()

print("Nombre de NaN dans les colonnes :", nb_nan_colTax_taxE)

# Compter le nombre de valeurs "NaN" dans le dataset
nb_nan_dataset_taxE = taxE.isna().sum().sum()

print("Nombre de NaN dans le dataset :", nb_nan_dataset_taxE)

Nombre de NaN dans les colonnes : Pays                                       0
ISO3                                       0
Année                                      0
ECGTE_TaxeEnv_DeviseNat                  460
ECGTE_TaxeEnv_PctPIB                     477
ECGTEN_TaxeEnergie_CarbIncl_DeviseNat    339
ECGTEN_TaxeEnergie_CarbIncl_PctPIB       345
ECGTEP_TaxePollution_DeviseNat           134
ECGTEP_TaxePollution_PctPIB              140
ECGTER_TaxeRess_DeviseNat                146
ECGTER_TaxeRess_PctPIB                   152
ECGTET_TaxeTransp_CarbExcl_DeviseNat     350
ECGTET_TaxeTransp_CarbExcl_PctPIB        367
dtype: int64
Nombre de NaN dans le dataset : 2910


In [170]:
# Filtrer les lignes avec des valeurs nan puis les afficher
nan_rows_taxE = taxE[taxE.isna().any(axis=1)]

print("Lignes contenant des valeurs NaN :")
print(nan_rows_taxE)

Lignes contenant des valeurs NaN :
                          Pays ISO3 Année  ECGTE_TaxeEnv_DeviseNat  \
5     Andorra, Principality of  AND  2015             4.600367e+12   
6     Andorra, Principality of  AND  2016             4.616889e+12   
7     Andorra, Principality of  AND  2017             5.043869e+12   
8     Andorra, Principality of  AND  2018             5.283535e+12   
9     Andorra, Principality of  AND  2019             5.675649e+12   
...                        ...  ...   ...                      ...   
2721                   Vietnam  VNM  1999                      NaN   
2722                   Vietnam  VNM  2000                      NaN   
2723                   Vietnam  VNM  2001                      NaN   
2724                   Vietnam  VNM  2002                      NaN   
2743                   Vietnam  VNM  2021                      NaN   

      ECGTE_TaxeEnv_PctPIB  ECGTEN_TaxeEnergie_CarbIncl_DeviseNat  \
5                      NaN                             

In [171]:
taxE.dtypes

Pays                                      object
ISO3                                      object
Année                                     object
ECGTE_TaxeEnv_DeviseNat                  float64
ECGTE_TaxeEnv_PctPIB                     float64
ECGTEN_TaxeEnergie_CarbIncl_DeviseNat    float64
ECGTEN_TaxeEnergie_CarbIncl_PctPIB       float64
ECGTEP_TaxePollution_DeviseNat           float64
ECGTEP_TaxePollution_PctPIB              float64
ECGTER_TaxeRess_DeviseNat                float64
ECGTER_TaxeRess_PctPIB                   float64
ECGTET_TaxeTransp_CarbExcl_DeviseNat     float64
ECGTET_TaxeTransp_CarbExcl_PctPIB        float64
dtype: object

In [172]:
taxE.shape

(2750, 13)

In [173]:
# Vérifier les valeurs nulles dans le DataFrame
null_values = taxE.isnull()

# Vérifier si le DataFrame est vide
is_empty = taxE.empty

# Afficher les valeurs nulles
print("Valeurs nulles dans le DataFrame :")
print(null_values)

# Afficher si le DataFrame est vide
print("\nLe DataFrame est vide :", is_empty)


Valeurs nulles dans le DataFrame :
       Pays   ISO3  Année  ECGTE_TaxeEnv_DeviseNat  ECGTE_TaxeEnv_PctPIB  \
0     False  False  False                    False                 False   
1     False  False  False                    False                 False   
2     False  False  False                    False                 False   
3     False  False  False                    False                 False   
4     False  False  False                    False                 False   
...     ...    ...    ...                      ...                   ...   
2745  False  False  False                    False                 False   
2746  False  False  False                    False                 False   
2747  False  False  False                    False                 False   
2748  False  False  False                    False                 False   
2749  False  False  False                    False                 False   

      ECGTEN_TaxeEnergie_CarbIncl_DeviseNat  \
0    

In [174]:
# Sélectionner les lignes avec NaN ou 0 dans les 5 colonnes spécifiées
lignes_nan_zero = taxE.loc[(taxE['ECGTE_TaxeEnv_DeviseNat'].isna() | (taxE['ECGTE_TaxeEnv_DeviseNat'] == 0)) &
(taxE['ECGTE_TaxeEnv_PctPIB'].isna() | (taxE['ECGTE_TaxeEnv_PctPIB'] == 0)) &
                               (taxE['ECGTEN_TaxeEnergie_CarbIncl_DeviseNat'].isna() | (taxE['ECGTEN_TaxeEnergie_CarbIncl_DeviseNat'] == 0)) &
                               (taxE['ECGTEN_TaxeEnergie_CarbIncl_PctPIB'].isna() | (taxE['ECGTEN_TaxeEnergie_CarbIncl_PctPIB'] == 0)) &

                               (taxE['ECGTEP_TaxePollution_DeviseNat'].isna() | (taxE['ECGTEP_TaxePollution_DeviseNat'] == 0)) &
                               (taxE['ECGTEP_TaxePollution_PctPIB'].isna() | (taxE['ECGTEP_TaxePollution_PctPIB'] == 0)) &

                               (taxE['ECGTER_TaxeRess_DeviseNat'].isna() | (taxE['ECGTER_TaxeRess_DeviseNat'] == 0)) &
                               (taxE['ECGTER_TaxeRess_PctPIB'].isna() | (taxE['ECGTER_TaxeRess_PctPIB'] == 0)) &

                               (taxE['ECGTET_TaxeTransp_CarbExcl_DeviseNat'].isna() | (taxE['ECGTET_TaxeTransp_CarbExcl_DeviseNat'] == 0)) & 
(taxE['ECGTET_TaxeTransp_CarbExcl_PctPIB'].isna() | (taxE['ECGTET_TaxeTransp_CarbExcl_PctPIB'] == 0))]

# Afficher les lignes sélectionnées
taxE.head(10)

Unnamed: 0,Pays,ISO3,Année,ECGTE_TaxeEnv_DeviseNat,ECGTE_TaxeEnv_PctPIB,ECGTEN_TaxeEnergie_CarbIncl_DeviseNat,ECGTEN_TaxeEnergie_CarbIncl_PctPIB,ECGTEP_TaxePollution_DeviseNat,ECGTEP_TaxePollution_PctPIB,ECGTER_TaxeRess_DeviseNat,ECGTER_TaxeRess_PctPIB,ECGTET_TaxeTransp_CarbExcl_DeviseNat,ECGTET_TaxeTransp_CarbExcl_PctPIB
0,Albania,ALB,2015,43993140000.0,3.067206,37741110000.0,2.631314,1782069000.0,0.124246,32546493.0,0.002269,4437413000.0,0.309377
1,Albania,ALB,2016,47813790000.0,3.247163,40945620000.0,2.780726,1879970000.0,0.127674,52524339.0,0.003567,4935684000.0,0.335195
2,Albania,ALB,2017,47548580000.0,3.066373,40400040000.0,2.605369,1941324000.0,0.125195,61861356.38,0.003989,5145347000.0,0.33182
3,Albania,ALB,2018,51145590000.0,3.124865,43521820000.0,2.659072,2226251000.0,0.136018,56011991.0,0.003422,5341506000.0,0.326352
4,Albania,ALB,2019,53415650000.0,3.157133,45165300000.0,2.669496,2625011000.0,0.155151,58777099.5,0.003474,5566564000.0,0.329012
5,"Andorra, Principality of",AND,2015,4600367000000.0,,0.0,,0.0,,0.0,,4600367000000.0,
6,"Andorra, Principality of",AND,2016,4616889000000.0,,0.0,,0.0,,0.0,,4616889000000.0,
7,"Andorra, Principality of",AND,2017,5043869000000.0,,0.0,,0.0,,0.0,,5043869000000.0,
8,"Andorra, Principality of",AND,2018,5283535000000.0,,0.0,,0.0,,0.0,,5283535000000.0,
9,"Andorra, Principality of",AND,2019,5675649000000.0,,0.0,,0.0,,0.0,,5675649000000.0,


In [175]:
# Supprimer les lignes avec NaN ou 0 dans les 5 colonnes spécifiées
taxE = taxE.drop(taxE[(taxE['ECGTE_TaxeEnv_DeviseNat'].isna() | (taxE['ECGTE_TaxeEnv_DeviseNat'] == 0)) &
                (taxE['ECGTE_TaxeEnv_PctPIB'].isna() | (taxE['ECGTE_TaxeEnv_PctPIB'] == 0)) &
                               (taxE['ECGTEN_TaxeEnergie_CarbIncl_DeviseNat'].isna() | (taxE['ECGTEN_TaxeEnergie_CarbIncl_DeviseNat'] == 0)) &
                               (taxE['ECGTEN_TaxeEnergie_CarbIncl_PctPIB'].isna() | (taxE['ECGTEN_TaxeEnergie_CarbIncl_PctPIB'] == 0)) &

                               (taxE['ECGTEP_TaxePollution_DeviseNat'].isna() | (taxE['ECGTEP_TaxePollution_DeviseNat'] == 0)) &
                               (taxE['ECGTEP_TaxePollution_PctPIB'].isna() | (taxE['ECGTEP_TaxePollution_PctPIB'] == 0)) &

                               (taxE['ECGTER_TaxeRess_DeviseNat'].isna() | (taxE['ECGTER_TaxeRess_DeviseNat'] == 0)) &
                               (taxE['ECGTER_TaxeRess_PctPIB'].isna() | (taxE['ECGTER_TaxeRess_PctPIB'] == 0)) &

                               (taxE['ECGTET_TaxeTransp_CarbExcl_DeviseNat'].isna() | (taxE['ECGTET_TaxeTransp_CarbExcl_DeviseNat'] == 0)) & 
(taxE['ECGTET_TaxeTransp_CarbExcl_PctPIB'].isna() | (taxE['ECGTET_TaxeTransp_CarbExcl_PctPIB'] == 0))].index)

# Réinitialiser les index après la suppression des lignes
taxE = taxE.reset_index(drop=True)

# Afficher le dataframe après suppression
taxE.head(5)

Unnamed: 0,Pays,ISO3,Année,ECGTE_TaxeEnv_DeviseNat,ECGTE_TaxeEnv_PctPIB,ECGTEN_TaxeEnergie_CarbIncl_DeviseNat,ECGTEN_TaxeEnergie_CarbIncl_PctPIB,ECGTEP_TaxePollution_DeviseNat,ECGTEP_TaxePollution_PctPIB,ECGTER_TaxeRess_DeviseNat,ECGTER_TaxeRess_PctPIB,ECGTET_TaxeTransp_CarbExcl_DeviseNat,ECGTET_TaxeTransp_CarbExcl_PctPIB
0,Albania,ALB,2015,43993140000.0,3.067206,37741110000.0,2.631314,1782069000.0,0.124246,32546493.0,0.002269,4437413000.0,0.309377
1,Albania,ALB,2016,47813790000.0,3.247163,40945620000.0,2.780726,1879970000.0,0.127674,52524339.0,0.003567,4935684000.0,0.335195
2,Albania,ALB,2017,47548580000.0,3.066373,40400040000.0,2.605369,1941324000.0,0.125195,61861356.38,0.003989,5145347000.0,0.33182
3,Albania,ALB,2018,51145590000.0,3.124865,43521820000.0,2.659072,2226251000.0,0.136018,56011991.0,0.003422,5341506000.0,0.326352
4,Albania,ALB,2019,53415650000.0,3.157133,45165300000.0,2.669496,2625011000.0,0.155151,58777099.5,0.003474,5566564000.0,0.329012


In [176]:
taxE = taxE.fillna(0)

In [177]:
taxE.to_csv('./output/Tax_Energy_clear.csv', index=False)

In [178]:
# Nouveau dataframe taxE1 à partir de taxE
# taxE1 = taxE.copy()

# Définir une fonction de formatage en fonction du type d'unité
# def format_value(value, unit):
#     if "Domestic Currency" in unit:
#         return '{:,.2f}'.format(value).replace(',', ' ')
#     elif "Percent of GDP" in unit:
#         return '{:.4f}'.format(value)
#     else:
#         return value  # Garantir le retour de la valeur d'origine si l'unité n'est pas reconnue


# Appliquer le formatage en fonction du type d'unité pour chaque colonne concernée
# for column in taxE.columns:
#     if "Domestic Currency" in column:
#         taxE1[column] = taxE1[column].apply(lambda x: format_value(x, column))
#     elif "Percent of GDP" in column:
#         taxE1[column] = taxE1[column].apply(lambda x: format_value(x, column))

# taxE1.head(5)

In [179]:
# Nouveau dataframe taxE2 à partir de taxE
# taxE2 = taxE.copy()

# Définir une fonction de formatage en fonction du type d'unité
# def format_value(value, unit):
#     if "Domestic Currency" in unit:
#         return '{:,.2f}'.format(value).replace(',', ' ')
#     elif "Percent of GDP" in unit:
#         return '{:.4f}%'.format(value)
#     else:
#         return value  # Garantir le retour de la valeur d'origine si l'unité n'est pas reconnue

# Appliquer le formatage en fonction du type d'unité pour chaque colonne concernée
# for column in taxE.columns:
#     if "Domestic Currency" in column:
#         taxE2[column] = taxE2[column].apply(lambda x: format_value(x, column))
#     elif "Percent of GDP" in column:
#         taxE2[column] = taxE2[column].apply(lambda x: format_value(x, column))
# 
# taxE2.head(5)

In [180]:
# Mettre les valeurs de la colonne "Unit" en colonnes distinctes
#tax = tax.pivot_table(index=["Country", "ISO3", "CTS Name", "Année"], columns="Unit", values="Taxe").reset_index()
#tax.head(5)

In [181]:
# Mettre les valeurs de la colonne "CTS Name" en colonnes distinctes
# tax = tax.pivot_table(index=["Country", "ISO3", "Année", "Unit"], columns="CTS Name", values="Taxe").reset_index()

In [182]:
# tax.to_csv('./output/tax.csv', index=False)

In [183]:
# tax.to_csv('./output/Tax_Energy_clear.csv', index=False)