# Import Library

In [None]:
import pandas as pd
import numpy as np
import warnings
import seaborn as sns
      
import matplotlib
import matplotlib.pyplot as plt

import squarify
import matplotlib.gridspec as gs
import plotly.express as px
import pycountry_convert as pc

warnings.filterwarnings('ignore')

# CARBON EMISSION

This notebook is purely an exploratory data analysis to see if I can figure out which Countries have highest or Lowest Carbon Emission. To do this, I will be analysing & exploring the data set provided, and the historic carbon emission  from 1995.

I hope to learn a few things along the way, and I hope anyone reading this will, too.

I'll also explore if countries can improve their positions over time, or if the placings are more or less static.



# Load Data


In [None]:
#get data
df=pd.read_csv("../Emission/carbon dioxide.csv")
df_backup=df.copy()
df.head(2)

# DATA WRANGLING AND CLEANING

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.duplicated().any()

In [None]:
df.isnull().sum()

In [None]:
df.drop(['ObjectId','ISO3'],axis=1,inplace=True)


In [None]:
df.columns

In [None]:
df['Country'].unique()

In [None]:
df.describe()

In [None]:
#Group the column to return only one Country value for each year
df1=df.filter(regex='F\d').groupby(df['Country']).sum().reset_index(drop=False)
df1['Total']=df1.sum(axis=1, numeric_only= True)
df1.columns = df1.columns.str.strip('F')
df1.iloc[:,1:]= df1.iloc[:,1:].astype('int64')
df1.tail()

# DATA VISUALIZATION

In [None]:
# colours
low_c = '#dd4124'
high_c = '#009473'
color=["#f94144","#f3722c","#f8961e","#f9c74f","#90be6d","#43aa8b","#577590","#dd4124","#009473"]
plt.rcParams["font.family"] = "monospace"
 

In [None]:
# inspiration ; https://www.kaggle.com/gaetanlopez/how-to-make-clean-visualizations
# changed code signif.

fig = plt.figure(figsize=(6,3),dpi=150)
gs = fig.add_gridspec(1, 1)
gs.update(wspace=0.2, hspace=0.4)
ax0 = fig.add_subplot(gs[0, 0])

background_color = "#fafafa"
fig.patch.set_facecolor(background_color) # figure background color
ax0.set_facecolor(background_color) 

ax0.text(1.167,0.85,"Carbon Dioxide Emission from 1995-2018",color='#323232',fontsize=28, fontweight='bold', fontfamily='sanserif',ha='center')
ax0.text(1.13,-0.35,"Stand-out facts",color='gray',fontsize=28, fontweight='bold', fontfamily='monospace',ha='center')




ax0.text(0,0.4,"Kazakhstan",color=high_c,fontsize=25, fontweight='bold', fontfamily='monospace',ha='center')
ax0.text(0,0.1,"Highest Carbon Emission",color='gray',fontsize=15, fontfamily='monospace',ha='center')

ax0.text(0.77,0.4,"Asia",color=high_c,fontsize=25, fontweight='bold', fontfamily='monospace',ha='center')
ax0.text(0.75,0.1,"Highest in Continent",color='gray',fontsize=15, fontfamily='monospace',ha='center')

ax0.text(1.5,0.4,"Australia",color=low_c,fontsize=25, fontweight='bold', fontfamily='monospace',ha='center')
ax0.text(1.5,0.1,"Lowest in Continent",color='gray',fontsize=15, fontfamily='monospace',ha='center')

ax0.text(2.25,0.4,"Switzerland",color=low_c,fontsize=25, fontweight='bold', fontfamily='monospace',ha='center')
ax0.text(2.25,0.1,"Lowest Carbon Emission",color='gray',fontsize=15, fontfamily='monospace',ha='center')

ax0.set_yticklabels('')
ax0.set_xticklabels('')
ax0.tick_params(axis='both',length=0)

for s in ['top','right','left','bottom']:
    ax0.spines[s].set_visible(False)
    
import matplotlib.lines as lines
l1 = lines.Line2D([0.15, 1.95], [0.67, 0.67], transform=fig.transFigure, figure=fig,color = 'gray', linestyle='-',linewidth = 1.1, alpha = .5)
fig.lines.extend([l1])
l2 = lines.Line2D([0.15, 1.95], [0.07, 0.07], transform=fig.transFigure, figure=fig,color = 'gray', linestyle='-',linewidth = 1.1, alpha = .5)
fig.lines.extend([l2])
    
plt.show()

In [None]:
df2=df1.sort_values('Total',ascending=False) #sort the value

In [None]:
fig = plt.figure(figsize=(15,15),dpi=150)
sns.barplot(x = "Total", y = "Country", data=df2 ,palette="coolwarm")

plt.xlabel("Total Emission from 1995-2018 ")
plt.ylabel("Country")
plt.ticklabel_format(style='plain', axis='x')

In [None]:
top5=df2.head(5)
bot5=df2.tail(5)


fig= plt.figure(figsize=(15,8))
g=gs.GridSpec(ncols=1, nrows=2, figure=fig)


plt.suptitle("Countries With Highest and Lowest CO2 Emission", family='Serif', weight='bold', size=30)
ax1=plt.subplot(g[0,0])
ax1=sns.barplot(data=top5, x=top5['Total'],y=top5['Country'], color=color[5])
ax1.set_xlabel('Total Emission from 1995-2018 (per Metric tons) ')
ax1.xaxis.set_visible(True)
ax1.annotate("Top 5 countries With Highest CO2 Emission",xy=(18,2), family='Serif', weight='bold', size=12)
ax2=plt.subplot(g[1,0], sharex=ax1)
ax2=sns.barplot(data=bot5, x=bot5['Total'],y=bot5['Country'], color=color[0])
ax2.annotate("Bottom 5 countries with Lowest CO2 Emission",xy=(8,2), family='Serif', weight='bold', size=12)
ax2.set_xlabel('Total Emission from 1995-2018 (per Metric tons) ')
ax2.xaxis.set_visible(True)
for s in ['left','right','top','bottom']:
    ax1.spines[s].set_visible(False)
    ax2.spines[s].set_visible(False)


In [None]:
df6=top5
del df6['Total']
dfm2 = df6.melt('Country', var_name='Year', value_name='Emission')

In [None]:
plt.figure(figsize=(22,8), dpi= 280)
g = sns.pointplot(x="Year", y="Emission", hue='Country', data=dfm2, kind='point')


In [None]:
px.box(data_frame=dfm2, x='Emission', y='Country', color='Country', title='Variation of Carbon Emission for Top 5 Countries')


In [None]:
dfm2['5years']=1

dfm2['5years'][dfm2['Year'].between('1995','2000')]='1995-2000'
dfm2['5years'][dfm2['Year'].between('2001','2005')]='2001-2005'
dfm2['5years'][dfm2['Year'].between('2006','2010')]='2006-2010'
dfm2['5years'][dfm2['Year'].between('2011','2015')]='2011-2015'
dfm2['5years'][dfm2['Year'].between('2016','2018')]='2016-2018'

In [None]:
px.box(data_frame=dfm2, x='Emission', y='Country', color='Country', title='Variation of Carbon Emission for 5years',facet_col='5years')

In [None]:
import pycountry_convert as pc
df3=df.groupby(['Country','ISO2']).sum().reset_index(drop=False)
df3.head(30)
df3=pd.merge(df3, df1, on='Country')

f = lambda x: pc.country_alpha2_to_continent_code(x)
df3['Continent'] = df3['ISO2'].apply(f)


name = {
    'NA': 'North America',
    'SA': 'South America', 
    'AS': 'Asia',
    'OC': 'Australia',
    'AF': 'Africa',
    'EU': 'Europe'}
df3['Continent'] = df3['Continent'].map(name)

In [None]:
top5.head()

In [None]:
df4=df3.groupby(['Continent']).sum().reset_index(drop=False)
df4.head()

In [None]:
sns.barplot(x = "Continent", y = "Total", data=df4 ,palette="bright")

plt.xlabel("Total Emission from 1995-2018 ")
plt.ylabel("Continent")
plt.xticks(rotation=45)
plt.ticklabel_format(style='plain', axis='y')



•	We could see that the total carbon emission for the top 5 Countries decrease steadily from 1995-2018

•	The highest Carbon Emission comes from Asia and Europe
