In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 1000)

Datasets

In [9]:
energy = pd.read_csv('owid-energy-data.csv')
countries = pd.read_csv('countries_regions.csv')
emissions = pd.read_csv('owid-co2-data.csv')

Data Cleaning & Formatting

In [None]:
#Formatting & Cleaning - Regions
countries['Country'] = countries['Country'].str.strip()
countries['Region'] = countries['Region'].str.strip()
regions = countries[['Country', 'Region']]

regions.replace({'SUB-SAHARAN AFRICA': 'AFRICA', 'NORTHERN AFRICA':'AFRICA', 'NORTHERN AMERICA':'NORTH AMERICA',
                 'NEAR EAST':'MIDDLE EAST', 'C.W. OF IND. STATES':'EASTERN EUROPE', 'ASIA (EX. NEAR EAST)':'ASIA'}, inplace=True)
regions['Region'] = np.where((regions['Country'] == 'Afghanistan') & (regions['Region'] == 'ASIA') |
                             (regions['Country'] == 'Iran') & (regions['Region'] == 'ASIA') |
                             (regions['Country'] == 'Pakistan') & (regions['Region'] == 'ASIA'),
                             'MIDDLE EAST', regions['Region'])
regions['Region'] = regions['Region'].astype('category')

In [11]:
#Formatting & Cleaning - Energy Production
energy['country'] = energy['country'].str.strip()
energy['year'] = pd.to_datetime(energy['year'], format = '%Y')
energy = energy.query('year >= 2000')
energy_select = energy[['country', 'year', 'population', 'greenhouse_gas_emissions', 'coal_production', 'gas_production','oil_production']]
energy_data = energy_select.merge(regions, left_on='country', right_on='Country').drop(columns='Country').fillna(0)

renew_energy_select = energy[['country', 'year', 'solar_electricity', 'wind_electricity','hydro_electricity','other_renewable_electricity','renewables_electricity']]
renew_energy_select = renew_energy_select.query('year >= 2000')
renew_energy_data = renew_energy_select.merge(regions, left_on='country', right_on='Country').drop(columns='Country').fillna(0)

In [7]:
#Formatting & Cleaning - Emissions
emissions['country'] = emissions['country'].str.strip()
emissions['year'] = pd.to_datetime(emissions['year'], format = '%Y')
emissions = emissions.query('year >= 2000')
emissions_select = emissions[['country', 'year', 'co2', 'coal_co2', 'gas_co2', 'oil_co2']]
emissions_data = emissions_select.merge(regions, left_on='country', right_on='Country').drop(columns=['Country', 'Region']).fillna(0)

In [None]:
#Merger
world_data_merge = energy_data.merge(emissions_data, on=['country', 'year']).fillna(0)
world_data = world_data_merge.merge(renew_energy_data, on=['country', 'year']).drop(columns=['Region_x', 'Region_y']).fillna(0)
world_data

Exploratory Analysis

In [None]:
world_data.corr()['co2']

In [None]:
fig = plt.gcf()
fig.set_size_inches(12, 8)

sns.relplot(x='population', y='co2',
           data=world_data,
           kind='scatter',
            hue='Region')
plt.show()

Visualizations

In [None]:
#BRICS countries total emissions over 20 years
emissions_data_brics_total = emissions_data_brics.groupby(by='Country')['CO2_emissions'].agg('sum').plot.bar()

In [None]:
#Top 10 emission producing countries
world_emissions_t10 = emissions_data.groupby('Country', sort=True)['CO2_emissions'].sum().reset_index().sort_values(by='CO2_emissions', ascending=False).head(10)

fig = plt.gcf()
fig.set_size_inches(12, 8)

sns.barplot(x='Country', y='CO2_emissions',
            data=world_emissions_t10)

In [None]:
energy_data_t10 = energy_data.groupby('Country')['Value_co2_emissions_kt_by_country'].sum().reset_index().sort_values(by='Value_co2_emissions_kt_by_country', ascending=False).head(10)

fig = plt.gcf()
fig.set_size_inches(12, 8)

sns.barplot(x='Country', y='Value_co2_emissions_kt_by_country',
            data=energy_data_t10)
plt.show()

In [None]:
#Top 10 emission producing countries pver 20 years
emissions_t10_list = print(world_emissions_t10['Country'].to_list())


['China', 'India', 'Japan', 'Brazil', 'Indonesia', 'Germany', 'Canada', 'Saudi Arabia', 'South Africa', 'Mexico']
