In [326]:
import pandas as pd

data = pd.DataFrame(columns = ['Country', 'Year', 'Value'])
df = pd.read_csv('./assets/CO2_Emissions_1960-2018.csv')

# Pays gardés
kept_countries = ['France', 'Italy', 'Spain', 'Germany', 'Portugal']
limit_to_kept_countries = True

df.head(5)

Unnamed: 0,Country Name,1960,1961,1962,1963,1964,1965,1966,1967,1968,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Aruba,204.631696,208.837879,226.08189,214.785217,207.626699,185.213644,172.158729,210.819017,194.917536,...,,,,,,,,,,
1,Africa Eastern and Southern,0.90606,0.922474,0.930816,0.94057,0.996033,1.04728,1.033908,1.052204,1.079727,...,1.021954,1.048876,1.005338,1.021646,1.031833,1.041145,0.987393,0.971016,0.959978,0.933541
2,Afghanistan,0.046057,0.053589,0.073721,0.074161,0.086174,0.101285,0.107399,0.123409,0.115142,...,0.211306,0.297065,0.407074,0.335351,0.263716,0.234037,0.232176,0.208857,0.203328,0.200151
3,Africa Western and Central,0.09088,0.095283,0.096612,0.112376,0.133258,0.184803,0.193676,0.189305,0.143989,...,0.42677,0.472819,0.497023,0.490867,0.504655,0.507671,0.480743,0.472959,0.476438,0.515544
4,Angola,0.100835,0.082204,0.210533,0.202739,0.213562,0.205891,0.268937,0.172096,0.289702,...,1.205902,1.221515,1.216317,1.204799,1.261542,1.285365,1.260921,1.227703,1.034317,0.88738


In [327]:
# Récupération de la liste des pays
countries = df['Country Name'].unique().tolist()
print(countries)
df.set_index('Country Name', inplace=True)

['Aruba', 'Africa Eastern and Southern', 'Afghanistan', 'Africa Western and Central', 'Angola', 'Albania', 'Andorra', 'Arab World', 'United Arab Emirates', 'Argentina', 'Armenia', 'American Samoa', 'Antigua and Barbuda', 'Australia', 'Austria', 'Azerbaijan', 'Burundi', 'Belgium', 'Benin', 'Burkina Faso', 'Bangladesh', 'Bulgaria', 'Bahrain', 'Bahamas, The', 'Bosnia and Herzegovina', 'Belarus', 'Belize', 'Bermuda', 'Bolivia', 'Brazil', 'Barbados', 'Brunei Darussalam', 'Bhutan', 'Botswana', 'Central African Republic', 'Canada', 'Central Europe and the Baltics', 'Switzerland', 'Channel Islands', 'Chile', 'China', "Cote d'Ivoire", 'Cameroon', 'Congo, Dem. Rep.', 'Congo, Rep.', 'Colombia', 'Comoros', 'Cabo Verde', 'Costa Rica', 'Caribbean small states', 'Cuba', 'Curacao', 'Cayman Islands', 'Cyprus', 'Czech Republic', 'Germany', 'Djibouti', 'Dominica', 'Denmark', 'Dominican Republic', 'Algeria', 'East Asia & Pacific (excluding high income)', 'Early-demographic dividend', 'East Asia & Pacific'

In [328]:
# Récupération de la liste des années
columns = df.columns
years = columns[1:].tolist()
print(years)

['1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']


In [329]:
# Création nouveau dataframe organisé par pays puis années
data_list = []

for country in countries:
    #values = df[df['Country Name'] == country]
    for year in years:
        value = df.loc[country, year]
        #print(value)
        data_list.append({'Country': country, 'Year': year, 'Value': value})
data = pd.DataFrame(data_list, columns=["Country", "Year", "Value"])
data.head(5)

Unnamed: 0,Country,Year,Value
0,Aruba,1961,208.837879
1,Aruba,1962,226.08189
2,Aruba,1963,214.785217
3,Aruba,1964,207.626699
4,Aruba,1965,185.213644


In [330]:
data.shape

(15428, 3)

In [331]:
# Suppression des valeurs inutiles si nécessaires
nb_na = data['Value'].isna().sum()
print("Nombre de valeurs invalides ou nulls: " + str(nb_na))
if nb_na > 0:
    data.dropna(subset=['Value'], inplace=True)
data.shape

Nombre de valeurs invalides ou nulls: 2289


(13139, 3)

In [332]:
data.to_csv('./output/CO2_Emissions_sortedByYearAndCountry.csv', index=False)

In [333]:
# Retirer les valeurs des pays différents de cette liste et exporter le fichier
if limit_to_kept_countries:
    data = data[data.Country.isin(kept_countries)]
data.to_csv('./output/CO2_Emissions_sortedByYearAndCountry.csv', index=False)


In [334]:
# Charger le fichier World Energy Consumption
wec = pd.read_csv('./assets/World Energy Consumption.csv')
if limit_to_kept_countries:
    wec = wec[wec.country.isin(kept_countries)]
wec.to_csv('./output/World Energy Consumption (limited).csv', index=False)

In [350]:
# Ajouter une colonne pour insérer les valeurs des émissions de Co2 en métrique tonne
wec['pollution_co2'] = [float("NaN")] * wec.shape[0]
for (i, r) in data.iterrows():
    for (idx, row) in wec.iterrows():
        if row.country == r.Country and int(row.year) == int(r.Year):
            wec.at[idx, 'pollution_co2'] = r.Value
# Valeurs enregistrés
wec.to_csv('./output/World Energy Consumption (limited).csv', index=False)