## Import Libraries

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as plo

## Load & Explore Data

In [2]:
df_energy = pd.read_csv("Datasets/energy_use_data_11-29-2021.csv")
df_energy.head(10)

Unnamed: 0,Domain Code,Domain,Area Code (ISO3),Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value,Flag,Flag Description
0,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1990,1990,kilotonnes,231.4918,F,FAO estimate
1,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1991,1991,kilotonnes,188.5317,F,FAO estimate
2,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1992,1992,kilotonnes,47.9904,F,FAO estimate
3,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1993,1993,kilotonnes,38.6116,F,FAO estimate
4,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1994,1994,kilotonnes,31.4465,F,FAO estimate
5,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1995,1995,kilotonnes,28.7303,F,FAO estimate
6,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1996,1996,kilotonnes,26.1157,F,FAO estimate
7,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1997,1997,kilotonnes,22.6852,F,FAO estimate
8,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1998,1998,kilotonnes,21.3114,F,FAO estimate
9,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1999,1999,kilotonnes,19.8071,F,FAO estimate


In [3]:
df_energy.describe()

Unnamed: 0,Element Code,Item Code,Year Code,Year,Value
count,46131.0,46131.0,46131.0,46131.0,46131.0
mean,7273.0,6803.57172,1998.988814,1998.988814,863.132722
std,0.0,2.916637,13.111035,13.111035,5274.730687
min,7273.0,6800.0,1970.0,1970.0,0.0
25%,7273.0,6801.0,1990.0,1990.0,3.37075
50%,7273.0,6804.0,2000.0,2000.0,21.4899
75%,7273.0,6805.0,2010.0,2010.0,165.7289
max,7273.0,6809.0,2019.0,2019.0,197674.5593


In [5]:
df_energy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46131 entries, 0 to 46130
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Domain Code       46131 non-null  object 
 1   Domain            46131 non-null  object 
 2   Area Code (ISO3)  46131 non-null  object 
 3   Area              46131 non-null  object 
 4   Element Code      46131 non-null  int64  
 5   Element           46131 non-null  object 
 6   Item Code         46131 non-null  int64  
 7   Item              46131 non-null  object 
 8   Year Code         46131 non-null  int64  
 9   Year              46131 non-null  int64  
 10  Unit              46131 non-null  object 
 11  Value             46131 non-null  float64
 12  Flag              46131 non-null  object 
 13  Flag Description  46131 non-null  object 
dtypes: float64(1), int64(4), object(9)
memory usage: 4.9+ MB


In [6]:
df_energy.shape

(46131, 14)

## Analyze Data

In [4]:
df_energy['Item'].value_counts()

Item
Motor Gasoline                       8756
Gas-Diesel oil                       8160
Liquefied petroleum gas (LPG)        7431
Fuel oil                             6418
Electricity                          6061
Coal                                 4304
Natural gas (including LNG)          3787
Gas-diesel oils used in fisheries     747
Fuel oil used in fisheries            467
Name: count, dtype: int64

In [9]:
df_energy['Area'].value_counts()

Area
Japan                       407
Chile                       407
China                       396
Denmark                     395
New Zealand                 392
                           ... 
Sudan                        32
Turks and Caicos Islands     30
Liechtenstein                30
South Sudan                  24
Isle of Man                  18
Name: count, Length: 229, dtype: int64

In [10]:
df_energy.groupby('Area')['Item'].value_counts()

Area         Item                         
Afghanistan  Electricity                      50
             Motor Gasoline                   50
             Coal                             30
             Gas-Diesel oil                   30
             Natural gas (including LNG)      30
                                              ..
Zimbabwe     Gas-Diesel oil                   44
             Liquefied petroleum gas (LPG)    44
             Motor Gasoline                   44
             Coal                             43
             Fuel oil                         41
Name: count, Length: 1285, dtype: int64

In [8]:
df_energy.isnull().sum().sum()

0

## Preparing Data

In [16]:
country = ["China","Germany", "Japan", "United States of America"]
energy = df_energy[(df_energy["Area"].isin(country)) & (df_energy["Year"]==2019)].reset_index(drop=True)
energy["Area"] = energy["Area"].replace(to_replace=["United States of America"],value=["United States"])
energy = energy.groupby(["Area", "Item"])["Value"].mean().reset_index()
totals = energy.groupby("Area")["Value"].sum().reset_index()
energy = pd.merge(energy, totals, on="Area")
energy["%"] = (energy["Value_x"] / energy["Value_y"]*100).round(2)
energy.head(7)

Unnamed: 0,Area,Item,Value_x,Value_y,%
0,China,Coal,56612.4024,217671.2249,26.01
1,China,Electricity,105472.7129,217671.2249,48.46
2,China,Fuel oil,80.9078,217671.2249,0.04
3,China,Gas-Diesel oil,47328.8167,217671.2249,21.74
4,China,Liquefied petroleum gas (LPG),231.0721,217671.2249,0.11
5,China,Motor Gasoline,7648.5285,217671.2249,3.51
6,China,Natural gas (including LNG),296.7845,217671.2249,0.14


## Data Visualization

In [48]:
title = "<b>CO2 Emissions in 2019</b><br><sup>Per Top 4 Countries and Energy Industries</sup>"
layout = plo.Layout(width=880, height=600, plot_bgcolor="dark grey", paper_bgcolor="dark grey",showlegend = False, title = {'text' : title, 'x':0.5, 'xanchor': 'center'},font = {"color" : 'white'})
fig =plo.Figure( layout = layout)
fig.add_trace(plo.Scatter(x=energy["Area"],y=energy["Item"],mode='markers', 
    hovertemplate="Country: %{x}<br>" + "Industry: %{y}<br>" +
                  "CO2 Emissions: %{marker.size:,}%" + "<extra></extra>",
    marker=dict(color=energy["%"],size=energy["%"],showscale=True,
                colorbar=dict(title='%CO2<br>Emissions'),opacity=0.7,colorscale='Jet')))

fig.update_xaxes(showline=True, linewidth=0.1, linecolor='#c9c4c3', gridcolor='#c9c4c3',
                 tickfont=dict(size=14, color='yellow'),title="Countries", showgrid=True, tickangle=0)
fig.update_yaxes(showline=False, linewidth=0.1, gridcolor='#c9c4c3',
                 tickfont=dict(size=14, color='yellow'),title="Energies", showgrid=True)
fig.show()