In [80]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

Datasets

In [23]:
energy = pd.read_csv('global-data-on-sustainable-energy.csv')
countries = pd.read_csv('countries_regions.csv')
emissions = pd.read_csv('FAOSTAT_data_global_emissions.csv')

Data Cleaning & Formatting

In [6]:
#Formatting & Cleanup - Renewable Energy
energy = energy.rename(columns={'Entity':'Country'})
energy['Year'] = pd.to_datetime(energy['Year'], format='%Y')
energy['Country'] = energy['Country'].str.strip()
energy_select = energy[['Country', 'Year', 'Electricity from renewables (TWh)', 'Electricity from nuclear (TWh)',
                        'Electricity from fossil fuels (TWh)', 'Renewable energy share in the total final energy consumption (%)',
                        'Value_co2_emissions_kt_by_country']].fillna(0)

In [None]:
#Formatting - Regions
countries['Country'] = countries['Country'].str.strip()
countries['Region'] = countries['Region'].str.strip()
regions = countries[['Country', 'Region']]


#Cleaning up regions
regions.replace({'SUB-SAHARAN AFRICA': 'AFRICA', 'NORTHERN AFRICA':'AFRICA', 'NORTHERN AMERICA':'NORTH AMERICA',
                 'NEAR EAST':'MIDDLE EAST', 'C.W. OF IND. STATES':'EASTERN EUROPE', 'ASIA (EX. NEAR EAST)':'ASIA'}, inplace=True)
regions['Region'] = np.where((regions['Country'] == 'Afghanistan') & (regions['Region'] == 'ASIA') |
                             (regions['Country'] == 'Iran') & (regions['Region'] == 'ASIA') |
                             (regions['Country'] == 'Pakistan') & (regions['Region'] == 'ASIA'),
                             'MIDDLE EAST', regions['Region'])
regions['Region'] = regions['Region'].astype('category')

In [24]:
#Formatting & Cleaning - Emissions
emissions_2000 = emissions.query('Year >= 2000')
emissions_select = emissions_2000[['Area', 'Element', 'Item', 'Year', 'Value']]
emissions_select = emissions_select.rename(columns={'Value':'CO2_emissions'})

Exploratory Analysis

In [25]:
#Mergers
energy_data = energy_select.merge(regions, on='Country')
emissions_data = emissions_select.merge(regions, left_on='Area', right_on='Country').drop(columns='Area')

Subsets

In [26]:
energy_data_na = energy_data[energy_data['Region'] == 'NORTH AMERICA']
energy_data_weu = energy_data[energy_data['Region'] == 'WESTERN EUROPE']
emissions_data_asia = emissions_data[emissions_data['Region'] == 'ASIA']

In [48]:
brics_list = ['Brazil', 'Russia', 'India', 'China', 'South Africa']
emissions_data_brics = emissions_data[emissions_data['Country'].isin(brics_list)]

Visualizations

In [None]:
#BRICS countries total emissions over 20 years
emissions_data_brics_total = emissions_data_brics.groupby(by='Country')['CO2_emissions'].agg('sum').plot.bar()

In [None]:
#Top 10 emission producing countries
world_emissions_t10 = emissions_data.groupby('Country', sort=True)['CO2_emissions'].sum().reset_index().sort_values(by='CO2_emissions', ascending=False).head(10)

fig = plt.gcf()
fig.set_size_inches(12, 8)

sns.barplot(x='Country', y='CO2_emissions',
            data=world_emissions_t10)

In [89]:
#Top 10 emission producing countries pver 20 years
emissions_t10_list = print(world_emissions_t10['Country'].to_list())


['China', 'India', 'Japan', 'Brazil', 'Indonesia', 'Germany', 'Canada', 'Saudi Arabia', 'South Africa', 'Mexico']
