I will be using plotly express which is a wrapper for plotly with cufflinks for creating interactive graphs

Installing and Importing the Necessary Libraries

In [1]:
#pip install cufflinks

In [2]:
#pip install pandas-profiling

In [3]:
import pandas as pd
import pandas_profiling
import numpy as np
import cufflinks as cf
import plotly
import plotly.express as px
import plotly.graph_objects as go
import math

%matplotlib inline 
#magic function for showing the plots in the same code cell and for additional facilities

For working in offline mode and saving the plotly plots in local machine

In [4]:
from plotly.offline import plot,iplot,download_plotlyjs,init_notebook_mode
init_notebook_mode(connected=True)
cf.go_offline()

# Q.1.1 - For visualizing the Co2 emission by the countries overtime

Reading the DataFrame

In [5]:
co2_df = pd.read_csv("D:\\Downloads\\global_co2_emissions.csv", parse_dates = ['year'])

In [6]:
#co2_df.profile_report()

I will be making a copy of the original dataframe for easy handling and checking for missing values

In [7]:
co2_dfc = co2_df.copy()

co2_dfc.isna().sum()

country                              0
iso_code                          3371
year                                 0
Annual CO2 emissions (tonnes )       0
dtype: int64

In [8]:
#co2_dfc1 = co2_df.copy()

Creating a separate dataframe for the missing values and checking for the unique values

In [9]:
missing_vals = co2_dfc[co2_dfc.isna().any(axis=1)]
missing_vals['country'].unique()

array(['Africa', 'Asia', 'Asia (excl. China & India)', 'Europe',
       'Europe (excl. EU-27)', 'Europe (excl. EU-28)',
       'European Union (27)', 'European Union (28)',
       'French Equatorial Africa', 'French West Africa',
       'High-income countries', 'International transport',
       'Kuwaiti Oil Fires', 'Leeward Islands', 'Low-income countries',
       'Lower-middle-income countries', 'North America',
       'North America (excl. USA)', 'Oceania', 'Panama Canal Zone',
       'Ryukyu Islands', 'South America', 'St. Kitts-Nevis-Anguilla',
       'Upper-middle-income countries'], dtype=object)

In [10]:
co2_dfc.describe()

Unnamed: 0,Annual CO2 emissions (tonnes )
count,24670.0
mean,326658300.0
std,1677027000.0
min,34.0
25%,556928.0
50%,5332958.0
75%,48153090.0
max,36702500000.0


Filling in the missing iso codes for Kyrgystan and W&F Islands so that we can use the iso codes to remove the non countries

In [11]:
df1 = co2_dfc [co2_dfc ["country"].isin(["Kyrgysztan"])]
df1.fillna('KGZ', inplace = True)

df2 = co2_dfc [co2_dfc["country"].isin(["Wallis and Futuna Islands"])]
df2.fillna('WLF', inplace =True)

Removing the null values for the rows which are not countries

In [12]:
co2_dfc.dropna(inplace=True)

In [13]:
co2_dfc = pd.concat([co2_dfc,df1,df2])
co2_dfc

Unnamed: 0,country,iso_code,year,Annual CO2 emissions (tonnes )
0,Afghanistan,AFG,1949-01-01,14656
1,Afghanistan,AFG,1950-01-01,84272
2,Afghanistan,AFG,1951-01-01,91600
3,Afghanistan,AFG,1952-01-01,91600
4,Afghanistan,AFG,1953-01-01,106256
...,...,...,...,...
24665,Zimbabwe,ZWE,2016-01-01,10737567
24666,Zimbabwe,ZWE,2017-01-01,9581633
24667,Zimbabwe,ZWE,2018-01-01,11854367
24668,Zimbabwe,ZWE,2019-01-01,10949084


In [14]:
#co2_dfc1 = co2_dfc1[co2_dfc1['country'] == 'World']
#co2_dfc1.reset_index(level=0,inplace=True)
#del co2_dfc1["index"]
#co2_dfc1

Removing the rows which contain the entire worlds data which is not needed

In [15]:
co2_dfc = co2_dfc[co2_dfc['country'] != 'World']
co2_dfc

Unnamed: 0,country,iso_code,year,Annual CO2 emissions (tonnes )
0,Afghanistan,AFG,1949-01-01,14656
1,Afghanistan,AFG,1950-01-01,84272
2,Afghanistan,AFG,1951-01-01,91600
3,Afghanistan,AFG,1952-01-01,91600
4,Afghanistan,AFG,1953-01-01,106256
...,...,...,...,...
24665,Zimbabwe,ZWE,2016-01-01,10737567
24666,Zimbabwe,ZWE,2017-01-01,9581633
24667,Zimbabwe,ZWE,2018-01-01,11854367
24668,Zimbabwe,ZWE,2019-01-01,10949084


In [16]:
co2_dfc['Annual CO2 emissions (tonnes )'].nlargest(n=10)

4536    10667887453
4535    10489988555
4534    10289989525
4530     9985583382
4529     9952743755
4533     9920459189
4531     9848419740
4528     9775621803
4532     9720444086
4527     9528555734
Name: Annual CO2 emissions (tonnes ), dtype: int64

Storing the list of countries

In [103]:
list_countries = co2_dfc['country'].unique()
#for i in list_countries:
    #print(i)

Grouping the countries together for further processing

In [18]:
country_group = co2_dfc.groupby('country')

country_afghanistan = country_group.get_group('Afghanistan')
country_afghanistan['Annual CO2 emissions (tonnes )'].max()

12160286

Using a for loop to get the maximum co2 emitted by a country over the course of years

In [19]:
# Making empty lists to store the maximum c02 of countries and storing the corresponding country name
list_max= []
list_country_name = []

#begining the for loop
for i in list_countries:
    #will fetch the i th group from the "country_group"
    country_name = country_group.get_group(i)
    
    # will  take out the maximum co2 present in that group
    max_emm = country_name['Annual CO2 emissions (tonnes )'].max()
    
    # storing the values in the empty list
    list_max.append(max_emm)
    list_country_name.append(i)
    
print(len(list_max))
print(list_country_name)

222
['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Anguilla', 'Antarctica', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 'Bonaire Sint Eustatius and Saba', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde', 'Central African Republic', 'Chad', 'Chile', 'China', 'Christmas Island', 'Colombia', 'Comoros', 'Congo', 'Cook Islands', 'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Curacao', 'Cyprus', 'Czechia', 'Democratic Republic of Congo', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'Faeroe Islands', 'Fiji', 'Finland', 'France', 'French Guiana', 'French Polynesia', 'Gabon', 'G

Zipping the lists in  a dataframe

In [104]:
max_emm_df = pd.DataFrame(list(zip(list_country_name,list_max)),columns=['country','max_emission'])

# for printing the entire dataframe in a single cell output
#with pd.option_context('display.max_rows', None, 'display.max_columns', None):

print(max_emm_df)

               country  max_emission
0          Afghanistan      12160286
1              Albania       8976800
2              Algeria     166641950
3              Andorra        575248
4               Angola      33800624
..                 ...           ...
217            Vietnam     260312093
218  Wallis and Futuna         29312
219              Yemen      24976297
220             Zambia       7313113
221           Zimbabwe      17393590

[222 rows x 2 columns]


For showing the barplot of emissions by top 10 countries

In [94]:
fig = px.bar(max_emm_df,x="country",y="max_emission")
fig.show()

#fig.write_html("D:\Downloads\GHG emissions HTML Plots\CO2 emissions of Bar plot of all the countries.html")

The Above Graph shows the maximum co2 emitted by them in the entire history. If zoomed in to the graph we can see that china has emitted the maximum amount of co2 in history.

Taking the top 10 countries with the highest emission and storing it in top_10_list

In [22]:
max_10 =max_emm_df.nlargest(10,'max_emission')
top_10_list = list(max_10['country'].unique())
top_10_list

['China',
 'United States',
 'India',
 'Russia',
 'Japan',
 'Germany',
 'Iran',
 'Ukraine',
 'Saudi Arabia',
 'South Korea']

Here we can see the top 10 countries who emit the maximum amount of co2

In [109]:
fig2 = px.bar(max_10,x="country",y="max_emission")
fig2.show()

fig2.write_html("D:\Downloads\GHG emissions HTML Plots\CO2 emissions of TOP 10 countries.html")

Here the similar kind of graph is generated which also indicates China as the biggest emitter of co2 in history. Then US and India follow afterwards on the 2nd and 3rd positions. One of the reasons behind china's insanely high usage of co2 can be coupled with the booming industries in china which are mostly focused on making goods which are accountable in various sectors.

Now we can select these 10 countries and plot the co2 emitted by them over time

In [24]:
#Making an empty dataframe 
top_10_country = pd.DataFrame()

for x in top_10_list:
    # Storing only those values in dataframe which come in top 10 countries over the years
    top_10 = co2_dfc[co2_dfc['country'] == x]
    top_10_country = top_10_country.append(top_10)

In [25]:
# resetting the index to the default values
top_10_country.reset_index(level=0,inplace=True)
del top_10_country["index"]
top_10_country

Unnamed: 0,country,iso_code,year,Annual CO2 emissions (tonnes )
0,China,CHN,1899-01-01,95264
1,China,CHN,1902-01-01,95264
2,China,CHN,1903-01-01,1963904
3,China,CHN,1904-01-01,2088480
4,China,CHN,1905-01-01,2297328
...,...,...,...,...
1514,South Korea,KOR,2016-01-01,639258641
1515,South Korea,KOR,2017-01-01,655747114
1516,South Korea,KOR,2018-01-01,671630709
1517,South Korea,KOR,2019-01-01,648024558


Making the trend line plot for the top 10 emitter of co2 over the years

In [26]:
fig3 = px.line(top_10_country, x = "year", y = "Annual CO2 emissions (tonnes )", hover_name='country', hover_data= ['country','Annual CO2 emissions (tonnes )'],color='country', labels = {'country':'Country','Annual CO2 emissions (tonnes )': 'Co2 Emissions'}, height=600)
fig3.update_layout(title="Change in CO₂ Emission Between Years 1750 and 2020 - Countries",title_x=0.50)
fig3.update_layout(showlegend = False)
fig3.update(layout_coloraxis_showscale = True)
fig3.show()

The trend line shows the behaviour of top 10 countries in the matter of co2 emissions and we can see a certain spike in china's emissions whereas US had a consistent rise in co2 since the industrial age and a downward dip in the recent years due to the increase in the use of sustainable and renewable sources of energy. However for India there is a spike in recent years due to the booming population and exhaustive use of fossil fuels.

Saving the plot to an html file

In [27]:
#fig3.write_html("D:/Downloads/file_name.html")

# Q.1.2 - For finding out the Co2 emissions per capita

In [28]:
global_co2 = pd.read_csv("D:\\Downloads\\global-co2-data.csv")

In [29]:
per_capita = global_co2[['country','iso_code','year','co2_per_capita']]
per_capita

Unnamed: 0,country,iso_code,year,co2_per_capita
0,Afghanistan,AFG,1949,0.002
1,Afghanistan,AFG,1950,0.011
2,Afghanistan,AFG,1951,0.012
3,Afghanistan,AFG,1952,0.012
4,Afghanistan,AFG,1953,0.013
...,...,...,...,...
23703,Zimbabwe,ZWE,2015,0.881
23704,Zimbabwe,ZWE,2016,0.771
23705,Zimbabwe,ZWE,2017,0.720
23706,Zimbabwe,ZWE,2018,0.785


In [30]:
per_capita.isna().sum()

country              0
iso_code          2778
year                 0
co2_per_capita    1328
dtype: int64

In [31]:
per_capita['co2_per_capita'].mean()

4.059418990169768

We see that the mean is around 4. But this is for global average and hence it will be not a good practice to input all the missing values with the global mean. Hence I decided to make a function which can take the mean of co2 per capita for each country over the years and input the missing values present in that country with its mean itself.

In [32]:
# demo function
emp = []
for i in per_capita['co2_per_capita'].isna():
    if i == True:
        emp.append(i)
len(emp)

1328

In [33]:
null_vals = per_capita[per_capita.isna().any(axis=1)]
null_vals

Unnamed: 0,country,iso_code,year,co2_per_capita
71,Africa,,1884,0.000
72,Africa,,1885,0.000
73,Africa,,1886,0.000
74,Africa,,1887,0.000
75,Africa,,1888,0.001
...,...,...,...,...
23326,World,OWID_WRL,1895,
23327,World,OWID_WRL,1896,
23328,World,OWID_WRL,1897,
23329,World,OWID_WRL,1898,


In [105]:
#taking the unique country values from the per_capita dataframe
list_country = per_capita['country'].unique()

#grouping the countries
capita_country_grp = per_capita.groupby('country')

#making an empty dataframe
per_capita_not_null = pd.DataFrame()

for i in list_country:
    # getting the i th group - lets suppose its afganistan, then it will fetch the group of afganistan
    countries = capita_country_grp.get_group(i)
    
    #checking whether the values are missing in the group of afganistan 
    for x in countries['co2_per_capita'].isna():
        if x == True:
            # if x is true i.e the value is missing then, it will take the mean of co2 per capita and replace it in the missing positions
            countries['co2_per_capita'].fillna(countries['co2_per_capita'].mean(),inplace = True)
    per_capita_not_null = per_capita_not_null.append(countries)

# diplaying the entire dataframe
#with pd.option_context('display.max_rows', None, 'display.max_columns', None):
print(per_capita_not_null)

per_capita_not_null.isna().sum()
#per_capita_not_null.to_csv("D:\\Downloads\\per.csv")

           country iso_code  year  co2_per_capita
0      Afghanistan      AFG  1949           0.002
1      Afghanistan      AFG  1950           0.011
2      Afghanistan      AFG  1951           0.012
3      Afghanistan      AFG  1952           0.012
4      Afghanistan      AFG  1953           0.013
...            ...      ...   ...             ...
23703     Zimbabwe      ZWE  2015           0.881
23704     Zimbabwe      ZWE  2016           0.771
23705     Zimbabwe      ZWE  2017           0.720
23706     Zimbabwe      ZWE  2018           0.785
23707     Zimbabwe      ZWE  2019           0.708

[23708 rows x 4 columns]


country              0
iso_code          2778
year                 0
co2_per_capita     369
dtype: int64

Now there will still be missing values in the co2 per capita column as there are countries for which the data is not present over the entire time range. So the mean will not be calculated for these countries and it will not be filled up.So we can just drop these countries.

In [35]:
per_capita_not_null.dropna(inplace=True)

In [36]:
kryg = per_capita_not_null[per_capita_not_null["country"].isin(["Kyrgysztan"])]
kryg.fillna('KGZ', inplace = True)

per_capita_clean = pd.concat([per_capita_not_null,kryg])

# dropping the countries which have outliers in their  co2 per capita column
per_capita_clean = per_capita_clean[per_capita_clean['country'] != 'World']
per_capita_clean = per_capita_clean[per_capita_clean['country'] != 'Sint Maarten (Dutch part)']
per_capita_clean = per_capita_clean[per_capita_clean['country'] != 'Brunei']
per_capita_clean.isna().sum()
per_capita_clean

Unnamed: 0,country,iso_code,year,co2_per_capita
0,Afghanistan,AFG,1949,0.002
1,Afghanistan,AFG,1950,0.011
2,Afghanistan,AFG,1951,0.012
3,Afghanistan,AFG,1952,0.012
4,Afghanistan,AFG,1953,0.013
...,...,...,...,...
23703,Zimbabwe,ZWE,2015,0.881
23704,Zimbabwe,ZWE,2016,0.771
23705,Zimbabwe,ZWE,2017,0.720
23706,Zimbabwe,ZWE,2018,0.785


In [37]:
fig4 = px.line (per_capita_clean, x = "year", y = "co2_per_capita", hover_name='country', 
                hover_data= ['country' , 'co2_per_capita'],color='country', 
                labels = {'country':'Country','co2_per_capita': 'Co2 Per Capita'}, 
                height=600)

fig4.update_layout ( title="Co2 per Capita of Countries",title_x=0.50)
fig4.update_layout (showlegend = False)
fig4.update (layout_coloraxis_showscale = True)

fig4.show()

Now this graph shows the co2 per capita for all the countries in the same graph and hence we cannot interpret it very nicely, so focussing on only 10 countries again using the same logic as used above

In [38]:
per_capita_clean['co2_per_capita'].astype(float)
per_capita_clean

Unnamed: 0,country,iso_code,year,co2_per_capita
0,Afghanistan,AFG,1949,0.002
1,Afghanistan,AFG,1950,0.011
2,Afghanistan,AFG,1951,0.012
3,Afghanistan,AFG,1952,0.012
4,Afghanistan,AFG,1953,0.013
...,...,...,...,...
23703,Zimbabwe,ZWE,2015,0.881
23704,Zimbabwe,ZWE,2016,0.771
23705,Zimbabwe,ZWE,2017,0.720
23706,Zimbabwe,ZWE,2018,0.785


Now we will be using the same function approach for choosing the top 10 countries which have the highest c02 per capita as we cant just use the top 10 list obtained before as the countries who emit co2 over time will be different than the countries who have highest c02 per capita as per capita calculation depends on the population of the country

In [39]:
capita_countries = per_capita_clean['country'].unique()

group_countries = per_capita_clean.groupby('country')

list_capitas= []
list_capita_country = []
for k in capita_countries:
    name_of_country = group_countries.get_group(k)
    max_cap = name_of_country['co2_per_capita'].max()
    list_capitas.append(max_cap)
    list_capita_country.append(k)

print(list_capitas)

[0.402, 2.885, 3.988, 8.061, 1.642, 11.556, 19.637, 4.693, 5.824, 27.933, 19.276, 9.594, 8.327, 49.283, 40.348, 0.627, 5.758, 12.311, 14.262, 2.129, 0.678, 12.794, 2.237, 1.968, 88.84, 8.065, 3.279, 2.584, 7.806, 10.186, 0.212, 0.051, 0.972, 0.693, 18.705, 1.234, 0.115, 0.09, 4.649, 7.096, 2.092, 0.297, 1.088, 4.303, 1.807, 5.696, 3.463, 8.047, 18.269, 0.251, 14.24, 1.132, 2.58, 2.549, 2.709, 2.595, 1.121, 10.486, 0.392, 23.583, 0.148, 16.215, 2.536, 13.9, 9.957, 3.342, 10.917, 0.253, 5.668, 14.252, 0.581, 10.305, 14.436, 2.816, 1.167, 0.249, 0.242, 3.053, 0.303, 1.121, 6.524, 8.542, 12.299, 1.916, 2.282, 9.402, 5.632, 12.391, 10.398, 8.622, 4.327, 10.246, 3.544, 17.448, 0.379, 0.702, 74.98, 5.649, 4.577, 7.321, 4.409, 1.208, 1.107, 17.379, 7.168, 10.238, 41.105, 0.224, 0.135, 8.073, 3.14, 0.178, 7.793, 2.764, 1.725, 3.869, 5.961, 6.891, 20.35, 4.195, 13.19, 1.972, 0.391, 0.486, 1.749, 17.913, 0.486, 13.292, 29.864, 9.086, 0.949, 0.147, 1.009, 5.496, 10.657, 7.206, 9.797, 16.778, 1.166

In [106]:
max_cap_df = pd.DataFrame(list(zip(list_capita_country,list_capitas)),columns=['country','co2_per_capita'])

#with pd.option_context('display.max_rows', None, 'display.max_columns', None):
print(max_cap_df)

         country  co2_per_capita
0    Afghanistan           0.402
1        Albania           2.885
2        Algeria           3.988
3        Andorra           8.061
4         Angola           1.642
..           ...             ...
199    Venezuela          17.031
200      Vietnam           2.568
201        Yemen           1.939
202       Zambia           1.635
203     Zimbabwe           1.895

[204 rows x 2 columns]


In [41]:
max_capita_10 =max_cap_df.nlargest(10,'co2_per_capita')
top_10_capitas = list(max_capita_10['country'].unique())
top_10_capitas

['United Arab Emirates',
 'Qatar',
 'Bonaire Sint Eustatius and Saba',
 'Kuwait',
 'Bahamas',
 'Luxembourg',
 'Bahrain',
 'Trinidad and Tobago',
 'New Caledonia',
 'Aruba']

In [42]:
capitas_10_countries = pd.DataFrame()

for j in top_10_capitas:
    capitas= per_capita_clean[per_capita_clean['country'] == j]
    capitas_10_countries = capitas_10_countries.append(capitas)

In [43]:
capitas_10_countries.reset_index(level=0,inplace=True)
del capitas_10_countries["index"]
capitas_10_countries

Unnamed: 0,country,iso_code,year,co2_per_capita
0,United Arab Emirates,ARE,1959,0.126
1,United Arab Emirates,ARE,1960,0.119
2,United Arab Emirates,ARE,1961,0.109
3,United Arab Emirates,ARE,1962,0.164
4,United Arab Emirates,ARE,1963,0.176
...,...,...,...,...
802,Aruba,ABW,2015,8.632
803,Aruba,ABW,2016,8.410
804,Aruba,ABW,2017,8.724
805,Aruba,ABW,2018,8.898


In [96]:
fig5 = px.line(capitas_10_countries, x = "year", y = "co2_per_capita", hover_name='country', 
               hover_data= ['country','co2_per_capita'],color='country',
               labels = {'country':'Country','co2_per_capita': 'Co2 per capita'}, height=600)

fig5.update_layout(title="Co2 per Capita for Top 10 Countries",title_x=0.50)
fig5.update_layout(showlegend = False)
fig5.update(layout_coloraxis_showscale = True)

fig5.show()

#fig5.write_html("D:\Downloads\GHG emissions HTML Plots\CO2 per Capita of TOP 10 countries.html")

In the above graph we see that the major countries that have the highest co2 per capita are the countries that produce the most amount of fossil fuels such as crude oil. And these include the countries which come in the Middle East and they have very low population as compared with the amount of fossil fuels they extract. In 1969 UAE had the highest co2 per capita with 101 tonnes per person. And the highest recorded co2 per capita for Qatar was in 1963. 

In [45]:
top_10_country['year'] = top_10_country['year'].astype(str)

pd.to_datetime(top_10_country['year'],errors='ignore') 

top_10_country = top_10_country.loc[top_10_country['year'] > '1900-01-01' ]

# Q.1.3 For visualizing how much they have emitted overtime

Filtering the timelime to be later than the year 1900

In [46]:
co2_dfc['year'] = co2_dfc['year'].astype(str)

pd.to_datetime(co2_dfc['year'],errors='ignore') 

co2_dfc3 = co2_dfc.loc[co2_dfc['year'] > '1900-01-01' ]

In [97]:
fig6 = px.choropleth(co2_dfc3.groupby(['country' , 'year'])['Annual CO2 emissions (tonnes )'].sum().reset_index().sort_values(by=['year'],ascending = True),
                    locations = 'country',
                    locationmode='country names',
                    color = 'Annual CO2 emissions (tonnes )',
                    color_continuous_scale='Spectral_r',
                    height=800,
                    animation_frame='year',
                    animation_group='country')

fig6.update_layout(title = ' Co2 emissions by all the countries overtime')
fig6.show()

#fig6.write_html("D:\Downloads\GHG emissions HTML Plots\CO2 emmisions animation map.html")

The animated map depicts the change in co2 in tonnes over time from the year 1900  to 2020. The countries which are in blue show a relatively low emission of co2 in comparison to the countries in red such as USA.

# Q.4.1 Which sector contributes most to the GHG emissions

In [81]:
ghg_emissions = pd.read_csv("D:\\Downloads\\global-ghg-data.csv",parse_dates=['year'])

In [82]:
ghg_emissions
ghg_emissions['land-use-change-forestry'].dtype

dtype('float64')

Substituting the values which are lesser than zero by zero as we are only focussing on the contribution of these sectors to emissions.

In [107]:
num = ghg_emissions._get_numeric_data()
num[num<0] = 0

#with pd.option_context('display.max_rows', None, 'display.max_columns', None):
print(ghg_emissions)

          country iso_code       year  agriculture  land-use-change-forestry  \
0     Afghanistan      AFG 1990-01-01    8070000.0                       0.0   
1     Afghanistan      AFG 1991-01-01    8400000.0                       0.0   
2     Afghanistan      AFG 1992-01-01    8410000.0                       0.0   
3     Afghanistan      AFG 1993-01-01    8490000.0                       0.0   
4     Afghanistan      AFG 1994-01-01    8520000.0                       0.0   
...           ...      ...        ...          ...                       ...   
5650     Zimbabwe      ZWE 2014-01-01   10190000.0                11490000.0   
5651     Zimbabwe      ZWE 2015-01-01   11470000.0                11610000.0   
5652     Zimbabwe      ZWE 2016-01-01   10540000.0                87400000.0   
5653     Zimbabwe      ZWE 2017-01-01   10780000.0                87290000.0   
5654     Zimbabwe      ZWE 2018-01-01   11150000.0                87380000.0   

          waste  industry  manufacturin

Choosing the data of the entire world as our focus point as we want to see the cummulative contribution of each sector to emissions

In [85]:
ghg_world= ghg_emissions[ghg_emissions['country'] == 'World']
ghg_world.reset_index(level=0,inplace=True)
del ghg_world["index"]
ghg_world

Unnamed: 0,country,iso_code,year,agriculture,land-use-change-forestry,waste,industry,manufacturing-and-construction,transport,electricity,buildings,fugitive-emissions,other-fuel,aviation-and-shipping
0,World,OWID_WRL,1990-01-01,4997830000.0,1909290000.0,1364400000.0,1010440000,3955390000.0,4609040000.0,8596170000.0,2596850000.0,2253380000.0,1353120000.0,630860000.0
1,World,OWID_WRL,1991-01-01,4988460000.0,1909290000.0,1395180000.0,1014310000,3875910000.0,4648550000.0,8735040000.0,2622040000.0,2261860000.0,1362830000.0,638250000.0
2,World,OWID_WRL,1992-01-01,4966820000.0,1909290000.0,1418280000.0,1030400000,3743480000.0,4753490000.0,8894600000.0,2514520000.0,2165360000.0,1274100000.0,675220000.0
3,World,OWID_WRL,1993-01-01,4936150000.0,1909290000.0,1444390000.0,1044440000,3694800000.0,4794310000.0,8945800000.0,2574500000.0,2129690000.0,1293130000.0,667470000.0
4,World,OWID_WRL,1994-01-01,4981350000.0,1909320000.0,1470960000.0,1151620000,3711410000.0,4891400000.0,9042500000.0,2492940000.0,2063050000.0,1262920000.0,693780000.0
5,World,OWID_WRL,1995-01-01,5038180000.0,1915150000.0,1476510000.0,1225100000,3937600000.0,5024900000.0,9231220000.0,2566840000.0,2073700000.0,1214260000.0,717670000.0
6,World,OWID_WRL,1996-01-01,5057350000.0,1711370000.0,1478310000.0,1277110000,3827310000.0,5232370000.0,9586390000.0,2637750000.0,2106340000.0,1154630000.0,739580000.0
7,World,OWID_WRL,1997-01-01,4986720000.0,2681330000.0,1474360000.0,1319750000,3852360000.0,5309680000.0,9853190000.0,2615100000.0,2091650000.0,1203740000.0,765100000.0
8,World,OWID_WRL,1998-01-01,5042290000.0,2011150000.0,1466280000.0,1323480000,3854060000.0,5431290000.0,10080660000.0,2463090000.0,2076100000.0,1180800000.0,789130000.0
9,World,OWID_WRL,1999-01-01,5098820000.0,1819850000.0,1464470000.0,1329140000,3694740000.0,5592430000.0,10134660000.0,2533910000.0,2089090000.0,1191820000.0,830320000.0


In [54]:
ghg_world.isna().sum()

country                           0
iso_code                          0
year                              0
agriculture                       0
land-use-change-forestry          0
waste                             0
industry                          0
manufacturing-and-construction    0
transport                         0
electricity                       0
buildings                         0
fugitive-emissions                0
other-fuel                        0
aviation-and-shipping             0
dtype: int64

In [98]:
fig_world = px.line(ghg_world, x = 'year', y = ['land-use-change-forestry' , 'waste' , 'industry' , 'transport' , 'manufacturing-and-construction' , 
                                                'electricity', 'buildings', 'fugitive-emissions', 'other-fuel' , 'aviation-and-shipping' ],
                    hover_data= ['value'],
                    labels = {'value': 'GHG emissions'}, height=600)

fig_world.update_layout(title="Sector wise contribution to the Global GHG Emissions",yaxis_title = "GHG emissions in Tonnes",
                        title_x=0.50)
fig_world.update_layout(showlegend = False)
fig_world.update(layout_coloraxis_showscale = True)

fig_world.show()

#fig_world.write_html("D:\Downloads\GHG emissions HTML Plots\Sector Wise Distribution of GHG Emissions.html")

This graph is very essential as it indcates from where most of the ghg emissions come from. We can generate the same graph for a particular country if a country wants to suppress the co2 coming from a particular sector but it is more sensible to see the sector wise behaviour from a global perspective. We see that heat and electricity are the largest contributors to the emissions. The least amount of co2 comes from aviation and shipping. The contributions from a particular country may change the behaviour of this graph as some countries specialize in a particular sector than others. Which can be shown as:

In [89]:
ghg_china= ghg_emissions[ghg_emissions['country'] == 'China']
ghg_china.reset_index(level=0,inplace=True)
del ghg_china["index"]
ghg_china

Unnamed: 0,country,iso_code,year,agriculture,land-use-change-forestry,waste,industry,manufacturing-and-construction,transport,electricity,buildings,fugitive-emissions,other-fuel,aviation-and-shipping
0,China,CHN,1990-01-01,590560000.0,0.0,194710000.0,94350000,745200000.0,94150000.0,725360000.0,384680000.0,156910000.0,236350000.0,5640000.0
1,China,CHN,1991-01-01,600810000.0,0.0,199460000.0,112600000,778830000.0,100620000.0,794330000.0,383270000.0,162830000.0,239200000.0,7260000.0
2,China,CHN,1992-01-01,605370000.0,0.0,204210000.0,135160000,807800000.0,111100000.0,877750000.0,360030000.0,168760000.0,232920000.0,11270000.0
3,China,CHN,1993-01-01,592670000.0,0.0,208960000.0,157020000,861540000.0,125700000.0,997650000.0,370430000.0,174680000.0,237710000.0,9810000.0
4,China,CHN,1994-01-01,612720000.0,0.0,213710000.0,180080000,903380000.0,116350000.0,1082830000.0,341370000.0,180600000.0,246340000.0,10830000.0
5,China,CHN,1995-01-01,671870000.0,0.0,205550000.0,204110000,1070170000.0,127370000.0,1185700000.0,365700000.0,194050000.0,242320000.0,11170000.0
6,China,CHN,1996-01-01,714640000.0,0.0,197380000.0,219900000,968660000.0,169850000.0,1330330000.0,313800000.0,207490000.0,180820000.0,12420000.0
7,China,CHN,1997-01-01,646420000.0,0.0,189200000.0,237400000,961130000.0,146130000.0,1328110000.0,352620000.0,220940000.0,229700000.0,10130000.0
8,China,CHN,1998-01-01,669110000.0,0.0,181030000.0,253390000,1023820000.0,140290000.0,1409580000.0,298690000.0,234380000.0,240760000.0,10640000.0
9,China,CHN,1999-01-01,689040000.0,0.0,172860000.0,277780000,871490000.0,157360000.0,1432760000.0,305360000.0,247830000.0,246800000.0,10860000.0


In [99]:
fig_china = px.line(ghg_china, x = 'year', y = ['land-use-change-forestry' , 'waste' , 'industry' , 'transport' , 'manufacturing-and-construction' , 
                                                'electricity', 'buildings', 'fugitive-emissions', 'other-fuel' , 'aviation-and-shipping' ],
                    hover_data= ['value'],
                    labels = {'value': 'GHG emissions'}, height=600)

fig_china.update_layout(title="Sector wise contribution to the Chinese GHG Emissions",yaxis_title = "GHG emissions in Tonnes",
                        title_x=0.50)
fig_china.update_layout(showlegend = False)
fig_china.update(layout_coloraxis_showscale = True)

fig_china.show()

#fig_china.write_html("D:\Downloads\GHG emissions HTML Plots\Sectorwise distribution of GHG in China.html")

This shows the sector wise contribution of ghg emissions in China. Here, contrary to the world graph, electricty and manufactoring sectors dominate the ghg emissions.

# Q.4.2 Does transport contribute more or less than electricity

To show the contribution of transport and electricity we can plot a comparative histogram of them

In [100]:
fig_comp = px.histogram(ghg_world, x="year", y=['transport','electricity'],
                         hover_data= ['value'],
                        labels = {'value': 'GHG emissions'},
                        barmode='group',
                        height=400)

fig_comp.update_layout(title="Comparison of contribution to the Global GHG emissions by Electricity and Transport Sector",
                       yaxis_title = "GHG emissions in Tonnes",
                        title_x=0.50)
fig_comp.update(layout_coloraxis_showscale = True)

fig_comp.show()

#fig_comp.write_html("D:\Downloads\GHG emissions HTML Plots\Electricity vs Transport.html")

We see that electricity always accounts for more co2 than the transport sector over the years.

# Q.4.3 How large are agriculture and land use emissions

For answering this question, we plot a stacked bar plot of land use and agriculture

In [101]:
fig_stack = px.bar(ghg_world, x="year", y=['land-use-change-forestry','agriculture'],
                         hover_data= ['value'],
                        labels = {'value': 'GHG emissions'},
                        height=400, width=900)

fig_stack.update_layout(title="Size of GHG emission contributions from Agro and Land use sector",
                       yaxis_title = "GHG emissions in Tonnes",
                        title_x=0.50)
fig_stack.update(layout_coloraxis_showscale = True)

fig_stack.show()

#fig_stack.write_html("D:\Downloads\GHG emissions HTML Plots\Agro and land use.html")

The highest emission by the agriculture and land use sector was in 1997 which stands as a staggering 4.98 Billion tonnes and 2.6 Billion tonnes respectively.

# Q.5.1 and 5.2 
# How much energy do we use per unit of GDP? 
# How much carbon do we emit per unit of energy?

In [58]:
gdp_df = pd.read_csv("D:\\Downloads\\global-co2-data.csv",parse_dates=['year'])
gdp_df

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,ghg_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
0,AFG,Afghanistan,1949-01-01,0.015,,,,,,0.002,...,,,,,,,,,7663783.0,
1,AFG,Afghanistan,1950-01-01,0.084,475.000,0.070,,,,0.011,...,,,,,,,,,7752000.0,1.949480e+10
2,AFG,Afghanistan,1951-01-01,0.092,8.696,0.007,,,,0.012,...,,,,,,,,,7840000.0,2.006385e+10
3,AFG,Afghanistan,1952-01-01,0.092,,,,,,0.012,...,,,,,,,,,7936000.0,2.074235e+10
4,AFG,Afghanistan,1953-01-01,0.106,16.000,0.015,,,,0.013,...,,,,,,,,,8040000.0,2.201546e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23703,ZWE,Zimbabwe,2015-01-01,12.170,1.653,0.198,13.308,1.138,9.350,0.881,...,4.885,11.87,0.859,6.68,0.484,,,,13815000.0,2.503057e+10
23704,ZWE,Zimbabwe,2016-01-01,10.815,-11.139,-1.356,12.171,1.356,12.542,0.771,...,4.703,11.92,0.850,6.55,0.467,,,,14030000.0,2.515176e+10
23705,ZWE,Zimbabwe,2017-01-01,10.247,-5.251,-0.568,11.774,1.527,14.902,0.720,...,,,,,,,,,14237000.0,
23706,ZWE,Zimbabwe,2018-01-01,11.341,10.674,1.094,12.815,1.475,13.006,0.785,...,,,,,,,,,14439000.0,


Focusing on India for this particular question

In [59]:
gdp_df= gdp_df[gdp_df['country'] == 'India']
gdp_df

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,ghg_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
10133,IND,India,1858-01-01,0.395,,,,,,0.002,...,,,,,,,,,2.032771e+08,
10134,IND,India,1859-01-01,0.638,61.344,0.242,,,,0.003,...,,,,,,,,,2.041200e+08,
10135,IND,India,1860-01-01,0.644,1.042,0.007,,,,0.003,...,,,,,,,,,2.049663e+08,
10136,IND,India,1861-01-01,0.498,-22.680,-0.146,,,,0.002,...,,,,,,,,,2.058179e+08,2.270000e+11
10137,IND,India,1862-01-01,0.551,10.667,0.053,,,,0.003,...,,,,,,,,,2.066748e+08,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10279,IND,India,2015-01-01,2253.429,3.159,68.997,2067.349,-186.079,-8.258,1.720,...,2.425,656.28,0.501,247.61,0.189,8022.117,6123.043,1.135,1.310152e+09,7.070000e+12
10280,IND,India,2016-01-01,2392.360,6.165,138.931,2180.245,-212.115,-8.866,1.806,...,2.443,663.63,0.501,247.43,0.187,8364.905,6315.438,1.106,1.324517e+09,7.560000e+12
10281,IND,India,2017-01-01,2456.848,2.696,64.488,2252.484,-204.364,-8.318,1.835,...,,,,,,,,,1.338677e+09,
10282,IND,India,2018-01-01,2591.324,5.474,134.476,2354.795,-236.529,-9.128,1.916,...,,,,,,,,,1.352642e+09,


In [108]:
gdp_df = gdp_df.loc[gdp_df['year'] > '1964-01-01' ]

Selecting the useful columns

In [61]:
gdp_df = gdp_df[['country' , 'year' , 'co2_per_unit_energy','energy_per_gdp']]

gdp_df.reset_index(level=0,inplace=True)
del gdp_df["index"]
gdp_df

Unnamed: 0,country,year,co2_per_unit_energy,energy_per_gdp
0,India,1965-01-01,0.251,0.991
1,India,1966-01-01,0.253,1.036
2,India,1967-01-01,0.245,1.011
3,India,1968-01-01,0.251,1.089
4,India,1969-01-01,0.23,1.177
5,India,1970-01-01,0.241,1.128
6,India,1971-01-01,0.245,1.144
7,India,1972-01-01,0.246,1.175
8,India,1973-01-01,0.249,1.115
9,India,1974-01-01,0.242,1.19


Performing the multiple y axis plots for co2 per unit energy and energy per gdp

In [102]:
from plotly.subplots import make_subplots

#fig_co2 = px.line(gdp_df, x = "year", y = "co2_per_unit_energy", height=600)

# Create figure with secondary y-axis
fig_co2 = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig_co2.add_trace(
    go.Scatter( x=gdp_df['year'], y=gdp_df['co2_per_unit_energy'], name="Co2 per unit energy"),
    secondary_y=False,
)

fig_co2.add_trace(
    go.Scatter(x=gdp_df['year'], y=gdp_df['energy_per_gdp'], name="Energy per unit gdp"),
    secondary_y=True,
)

fig_co2.update_layout(title="Energy per GDP and Co2 per unit energy for India",title_x=0.50)
fig_co2.update(layout_coloraxis_showscale = True)

fig_co2.show()

#fig_co2.write_html("D:\Downloads\GHG emissions HTML Plots\Energy per gdp and co2 per unit energy.html")

This plot shows the energy per gdp with reference to the right y axis and co2 per unit energy with reference to the left y axis with their corresponding scales.