### Import Libraries

In [81]:
# Manipulation Libraries
import pandas as pd
import numpy as np

# Visualization Libraries (introducing plotly)
import plotly.graph_objects as go

import warnings
warnings.filterwarnings("ignore", "is_categorical_dtype")
warnings.filterwarnings("ignore", "use_inf_as_na")
pd.options.display.float_format = '{:.0f}'.format

> **Note**: The energy sector contributes about 40 percent of global emissions of CO2 ([source here](https://openknowledge.worldbank.org/handle/10986/17143?show=full#:~:text=The%20energy%20sector%20contributes%20about,energy%2Dsector%20emissions%20in%202010.)).

## I. Load & Explore Data

In [82]:
# Import the data
energy = pd.read_csv(r"C:\Users\franc\OneDrive\Escritorio\HPChallenge\Challenge 1\energy_use_data_11-29-2021.csv")
energy

Unnamed: 0,Domain Code,Domain,Area Code (ISO3),Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value,Flag,Flag Description
0,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1990,1990,kilotonnes,231,F,FAO estimate
1,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1991,1991,kilotonnes,189,F,FAO estimate
2,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1992,1992,kilotonnes,48,F,FAO estimate
3,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1993,1993,kilotonnes,39,F,FAO estimate
4,GN,Energy Use,AFG,Afghanistan,7273,Emissions (CO2),6801,Gas-Diesel oil,1994,1994,kilotonnes,31,F,FAO estimate
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46126,GN,Energy Use,ZWE,Zimbabwe,7273,Emissions (CO2),6807,Electricity,2015,2015,kilotonnes,424,X,International reliable sources
46127,GN,Energy Use,ZWE,Zimbabwe,7273,Emissions (CO2),6807,Electricity,2016,2016,kilotonnes,214,X,International reliable sources
46128,GN,Energy Use,ZWE,Zimbabwe,7273,Emissions (CO2),6807,Electricity,2017,2017,kilotonnes,188,X,International reliable sources
46129,GN,Energy Use,ZWE,Zimbabwe,7273,Emissions (CO2),6807,Electricity,2018,2018,kilotonnes,218,X,International reliable sources


In [83]:
energy.describe()

Unnamed: 0,Element Code,Item Code,Year Code,Year,Value
count,46131,46131,46131,46131,46131
mean,7273,6804,1999,1999,863
std,0,3,13,13,5275
min,7273,6800,1970,1970,0
25%,7273,6801,1990,1990,3
50%,7273,6804,2000,2000,21
75%,7273,6805,2010,2010,166
max,7273,6809,2019,2019,197675


In [84]:
energy['Item'].value_counts()

Item
Motor Gasoline                       8756
Gas-Diesel oil                       8160
Liquefied petroleum gas (LPG)        7431
Fuel oil                             6418
Electricity                          6061
Coal                                 4304
Natural gas (including LNG)          3787
Gas-diesel oils used in fisheries     747
Fuel oil used in fisheries            467
Name: count, dtype: int64

## II. Prepare the Data

In [85]:
# Import the data
energy = pd.read_csv(r"C:\Users\franc\OneDrive\Escritorio\HPChallenge\Challenge 1\energy_use_data_11-29-2021.csv")

# Keep the same countries as in Graph 1
areas_to_keep = ["China","Germany", "Japan", "United States of America"]

# Filter the data
energy = energy[(energy["Area"].isin(areas_to_keep)) & 
                (energy["Year"]==2019)].reset_index(drop=True)

# Rename some Areas
energy["Area"] = energy["Area"].replace(to_replace=["United States of America"],
                                        value=["United States"])

# Group by Area and Item
energy = energy.groupby(["Area", "Item"])["Value"].mean().reset_index()

# Compute percentage per country
totals = energy.groupby("Area")["Value"].sum().reset_index()
energy = pd.merge(energy, totals, on="Area")
energy["Perc"] = energy["Value_x"] / energy["Value_y"]
energy["Perc"] = energy["Perc"].apply(lambda x: round(x*100, 2))

# Look into the dataframe
energy.head()

Unnamed: 0,Area,Item,Value_x,Value_y,Perc
0,China,Coal,56612,217671,26
1,China,Electricity,105473,217671,48
2,China,Fuel oil,81,217671,0
3,China,Gas-Diesel oil,47329,217671,22
4,China,Liquefied petroleum gas (LPG),231,217671,0


## III. Create the Bubble Chart

In [86]:
# Create the layout of the chart
title = "<b>CO2 Emissions in 2019</b><br><sup>Per Top 4 Countries and Energy Industries</sup>"
layout = go.Layout(width=980, height=600, plot_bgcolor="white", paper_bgcolor="white",
                   showlegend = False, 
                   title = {'text' : title, 'x':0.5, 'xanchor': 'center'}, 
                   font = {"color" : 'black'})

# Create the figure
fig = go.Figure( layout = layout)

# Create the base Scatter Plot
fig.add_trace(go.Scatter(
    # X and Y axis
    x=energy["Area"],
    y=energy["Item"],
    
    # The marker shape and size
    mode='markers', 
    hovertemplate="Country: %{x}<br>" +
                  "Industry: %{y}<br>" +
                  "CO2 Emissions: %{marker.size:,}%" +
                  "<extra></extra>",
    
    marker=dict(color=energy["Perc"],
                size=energy["Perc"],
                showscale=True,
                colorbar=dict(title='%CO2<br>Emissions'),
                opacity=0.7,
                colorscale='Jet')
))

# Update the x and y axis
fig.update_xaxes(showline=True, linewidth=0.1, linecolor='#c9c4c3', gridcolor='#c9c4c3',
                 tickfont=dict(size=14, color='black'), 
                 title="", showgrid=True, tickangle=0)

fig.update_yaxes(showline=False, linewidth=0.1, gridcolor='#c9c4c3',
                 tickfont=dict(size=14, color='black'), 
                 title="", showgrid=True)

# Show the graph
fig.show()

In [87]:
energy = pd.read_csv(r"C:\Users\franc\OneDrive\Escritorio\HPChallenge\Challenge 1\energy_use_data_11-29-2021.csv")

In [88]:
energy = energy[['Year','Area','Area Code (ISO3)','Value']].groupby(['Year', 'Area','Area Code (ISO3)']).sum().reset_index()
energy["Year"] = pd.to_datetime(energy["Year"], format = '%Y').dt.strftime('%Y')
energy

Unnamed: 0,Year,Area,Area Code (ISO3),Value
0,1970,Afghanistan,AFG,5
1,1970,Algeria,DZA,24
2,1970,Antigua and Barbuda,ATG,0
3,1970,Argentina,ARG,6520
4,1970,Australia,AUS,486
...,...,...,...,...
9493,2019,Viet Nam,VNM,3930
9494,2019,Wallis and Futuna Islands,WLF,1
9495,2019,Yemen,YEM,54
9496,2019,Zambia,ZMB,105


In [89]:
import plotly.express as px

# Supongamos que tienes un DataFrame llamado 'energy' con columnas 'Area Code (ISO3)', 'Value', 'Area', y 'Year'

# Create the layout of the chart
title = '<b>CO2 emissions per Country along time</sup>'
layout2 = dict(
    title=dict(text=title, x=0.5, xanchor='center'),
    font=dict(color='black'),
    width=980, height=600, plot_bgcolor="white", paper_bgcolor="white",
    geo=dict(showframe=False, showcoastlines=False, projection_type='equirectangular'),
)

# Create the animated choropleth map
fig2 = px.choropleth(
    energy,
    locations='Area Code (ISO3)',
    color='Value',
    hover_name='Area',
    animation_frame='Year',
    projection='natural earth',  # You can change the projection type as needed
    color_continuous_scale='RdYlGn_r',
    title=title,
)

# Update the layout
fig2.update_layout(layout2)

# Show the figure
fig2.show()
