In [ ]:
import pandas as pd

data = pd.DataFrame(columns = ['country', 'year', 'pollution_co2'])
df = pd.read_csv('./assets/CO2_Emissions_1960-2018.csv')

# Pays gardés
kept_countries = ['France', 'Italy', 'Spain', 'Germany', 'Portugal', 'United Kingdom']
limit_to_kept_countries = False

df.head(5)

In [ ]:
# Récupération de la liste des pays
countries = df['Country Name'].unique().tolist()
print(countries)
df.set_index('Country Name', inplace=True)

In [ ]:
# Récupération de la liste des années
columns = df.columns
years = columns[1:].tolist()
print(years)

In [ ]:
# Création nouveau dataframe organisé par pays puis années
data_list = []

for country in countries:
    #values = df[df['Country Name'] == country]
    for year in years:
        value = df.loc[country, year]
        #print(value)
        data_list.append({'country': country, 'year': year, 'pollution_co2': value})
data = pd.DataFrame(data_list, columns=["country", "year", "pollution_co2"])
data.head(5)

In [ ]:
data.shape

In [ ]:
# Suppression des valeurs inutiles si nécessaires
nb_na = data['pollution_co2'].isna().sum()
print("Nombre de valeurs invalides ou nulls: " + str(nb_na))
if nb_na > 0:
    data.dropna(subset=['pollution_co2'], inplace=True)
    print("Nettoyage des valeurs vides...")
data.shape

In [ ]:
data.to_csv('./output/CO2_Emissions_sortedByYearAndCountry.csv', index=False)

In [ ]:
# Retirer les valeurs des pays différents de cette liste et exporter le fichier
if limit_to_kept_countries:
    data = data[data.country.isin(kept_countries)]
data.to_csv('./output/CO2_Emissions_sortedByYearAndCountry.csv', index=False)


In [ ]:
# Charger le fichier World Energy Consumption
wec = pd.read_csv('./assets/World Energy Consumption.csv')
if limit_to_kept_countries:
    wec = wec[wec.country.isin(kept_countries)]
wec.to_csv('./output/World Energy Consumption (limited).csv', index=False)

In [ ]:
# Changement de types
data['year'] = data['year'].astype(int)
wec['year'] = wec['year'].astype(int)

wec['country'] = wec['country'].astype(str)
data['country'] = data['country'].astype(str)

# Fusion des valeurs de Co2
wec = pd.merge(wec, data, on=["country", "year"])

# Supprimer la colonne 'population'
wec = wec.drop('population', axis=1, errors='ignore')
wec = wec.drop('iso_code', axis=1, errors='ignore')
wec = wec.drop('gdp', axis=1, errors='ignore')

wec = wec[wec['year'] >= 1960]

# Valeurs enregistrés
wec.to_csv('./output/World Energy Consumption (limited).csv', index=False)