### Section 1: Setup & Data Cleaning

In [38]:
# Setup & Data Cleaning
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import pandas as pd
import importlib
import constants
import plotly.graph_objects as go

importlib.reload(constants)

default_countries = constants.default_countries
electricity_cols = constants.electricity_cols
edu_cols = constants.edu_cols

df = pd.read_csv("../data/generated/filtered_dataset.csv")

all_countries = sorted(df["Entity"].unique())

First of all wee need to import all the libraries, after which we import constants.py as well. Then we are reading the filtered_dataset.csv.

### ðŸ“˜ Section 2: GDP vs Internet Usage

In [23]:
gdp_inet_df_filtered = df[['Entity', 'Year', 'GDP_per_capita', 'Internet_Usage_Pct']].dropna()
gdp_inet_df_filtered = gdp_inet_df_filtered[(gdp_inet_df_filtered['Year'] > 1990) & (gdp_inet_df_filtered['Year'] < 2023)]

app1 = Dash(__name__)
app1.title = "GDP vs Internet Usage"

app1.layout = html.Div(
    style={
        "display": "flex",
        "flexDirection": "row",
        "backgroundColor": "#ffffff",
        "color": "#000000",
        "fontFamily": "Arial, sans-serif",
        "padding": "20px",
        "height": "auto",
        "width": "calc(100% - 40px)"
    },
    children=[
        html.Div([
            html.H4("Select countries:", style={"marginBottom": "10px"}),
            dcc.Checklist(
                id='country-selector',
                options=[{"label": c, "value": c} for c in all_countries],
                value=default_countries,
                labelStyle={'display': 'block', 'color': '#000'},
                style={
                    "overflowY": "auto",
                    "height": "calc(100vh - 100px)",
                    "border": "1px solid #ccc",
                    "padding": "10px",
                    "backgroundColor": "#f9f9f9"
                }
            )
        ], style={
            "width": "25%",
            "marginRight": "20px"
        }),

        html.Div([
            dcc.Graph(
                id='scatter-graph',
                style={
                    "height": "calc(100% - 100px)",
                    "width": "100%"
                }
            )
        ], style={
            "width": "75%",
            "height": "calc(100vh - 100px)"
        })
    ]
)

@app1.callback(
    Output('scatter-graph', 'figure'),
    Input('country-selector', 'value')
)
def update_graph(selected_countries):
    # Handle the case where no countries are selected
    if not selected_countries or len(selected_countries) == 0:
        # Create an empty figure with default ranges
        fig = px.scatter(
            x=[0], y=[0],
            title='GDP per Capita vs Internet Usage Over Time (No countries selected)'
        )
        fig.update_layout(
            xaxis=dict(range=[0, 100], title='Internet Usage (%)'),
            yaxis=dict(range=[0, 100000], title='GDP per Capita'),
            height=600
        )
        return fig

    filtered = gdp_inet_df_filtered[gdp_inet_df_filtered["Entity"].isin(selected_countries)].copy()
    all_years = sorted(filtered['Year'].unique())
    overall_max_gdp = filtered['GDP_per_capita'].max() * 1.1 if not filtered.empty else 100000

    # Create scatter plot with larger marker size
    fig = px.scatter(
        filtered,
        x='Internet_Usage_Pct',
        y='GDP_per_capita',
        animation_frame='Year',
        color='Entity',
        title=f'GDP per Capita vs Internet Usage Over Time ({len(selected_countries)} countries)',
        size_max=25,  # Maximum marker size
        hover_data=['Entity', 'Year', 'GDP_per_capita', 'Internet_Usage_Pct']  # Show these values on hover
    )

    # Further customize the markers and explicitly show legend for all traces
    fig.update_traces(
        marker=dict(
            size=15,           # Set default marker size
            line=dict(width=1, color='DarkSlateGrey')  # Add marker outline
        ),
        selector=dict(mode='markers'),
        showlegend=True  # Explicitly show the legend for these traces
    )

    # Improve layout for better visibility
    fig.update_layout(
        margin=dict(l=40, r=40, t=50, b=40),
        xaxis_title='Internet Usage (%)',
        yaxis_title='GDP per Capita',
        legend_title='Country',
        height=600,  # Fixed height in pixels
        xaxis=dict(
            range=[0, 100],  # Force x-axis range from 0 to 100%
            tickmode='linear',
            dtick=10  # Set tick marks every 10%
        ),
        yaxis=dict(
            range=[0, overall_max_gdp]  # Set y-axis range dynamically
        ),
        hoverlabel=dict(
            bgcolor="white",
            font_size=12,
            font_family="Arial"
        ),
        sliders=[{
            "active": 0,
            "yanchor": "top",
            "xanchor": "left",
            "currentvalue": {
                "font": {"size": 16},
                "prefix": "Year: ",
                "visible": True,
                "xanchor": "right"
            },
            "transition": {"duration": 300, "easing": "cubic-in-out"},
            "pad": {"b": 10, "t": 50},
            "len": 0.9,
            "x": 0.1,
            "y": 0,
            "steps": [
                {
                    "args": [[year], {
                        "frame": {"duration": 300, "redraw": True},
                        "mode": "immediate",
                    }],
                    "label": str(year),
                    "method": "animate"
                } for year in all_years
            ]
        }]
    )

    for frame in fig.frames:
        frame.layout.yaxis.range = [0, overall_max_gdp]

    return fig


app1.run(debug=False, port=8050)

Later we create the correlation between internet usage by percentage with GDP per Capita while also including a time factor, so we can track the change of correlation every 2 years starting from 1991 to 2021.

### Section 3: Technology vs Education level (GDP)

In [24]:
gdp_edu_df_filtered = df[['Entity', 'Year', 'GDP_per_capita','Internet_Usage_Pct','Electricity_Access_Pct','Pop_PostSec_Pct']].dropna()

app2 = Dash(__name__)
app2.title = "Technology vs Education level"

app2.layout = html.Div(
    style={
        "display": "flex",
        "flexDirection": "row",
        "backgroundColor": "#ffffff",
        "color": "#000000",
        "fontFamily": "Arial, sans-serif",
        "padding": "20px",
        "height": "auto",
        "width": "calc(100% - 40px)"
    },
    children=[
        html.Div([
            html.H4("Select countries:", style={"marginBottom": "10px"}),
            dcc.Checklist(
                id='country-selector',
                options=[{"label": c, "value": c} for c in all_countries],
                value=default_countries,
                labelStyle={'display': 'block', 'color': '#000'},
                style={
                    "overflowY": "auto",
                    "height": "calc(100vh - 100px)",
                    "border": "1px solid #ccc",
                    "padding": "10px",
                    "backgroundColor": "#f9f9f9"
                }
            )
        ], style={
            "width": "25%",
            "marginRight": "20px"
        }),

        html.Div([
            dcc.Graph(
                id='scatter-graph',
                style={
                    "height": "calc(100% - 100px)",
                    "width": "100%"
                }
            )
        ], style={
            "width": "75%",
            "height": "calc(100vh - 100px)"
        })
    ]
)

@app2.callback(
    Output('scatter-graph', 'figure'),
    Input('country-selector', 'value')
)
def update_graph(selected_countries):
    if not selected_countries or len(selected_countries) == 0:
        fig = px.scatter(
            x=[0], y=[0],
            title='GDP per Capita vs Education level'
        )
        fig.update_layout(
            xaxis=dict(range=[0, 100], title='Higher Education (%)'),
            yaxis=dict(range=[0, 100], title='Internet Access (%)'),
            height=600
        )
        return fig

    filtered = gdp_edu_df_filtered[gdp_edu_df_filtered["Entity"].isin(selected_countries)].copy()
    all_years = sorted(filtered['Year'].unique())

    # Create scatter plot
    fig = px.scatter(
        filtered,
        x='Pop_PostSec_Pct',  # Use the correct column name
        y='Internet_Usage_Pct',
        size ="GDP_per_capita",
        animation_frame='Year',
        color='Entity',
        title=f'GDP per Capita vs Higher Education Level ({len(selected_countries)} countries)',
        hover_data=['Entity', 'Year', 'Internet_Usage_Pct', 'Pop_PostSec_Pct', 'GDP_per_capita']
    )

    # Customize the markers
    fig.update_traces(
        marker=dict(
            size=15,
            line=dict(width=1, color='DarkSlateGrey')
        ),
        selector=dict(mode='markers'),
        showlegend=True
    )

    # Improve layout
    fig.update_layout(
        margin=dict(l=40, r=40, t=50, b=40),
        xaxis_title='Higher Education (% of population)',
        yaxis_title='Internet usage (% of population)',
        legend_title='Country',
        height=600,
        xaxis=dict(
            range=[0, 100],
            tickmode='linear',
            dtick=10
        ),
        yaxis=dict(
            range=[0, 100],
            tickmode='linear',
            dtick=10
        ),
        hoverlabel=dict(
            bgcolor="white",
            font_size=12,
            font_family="Arial"
        ),
        sliders=[{
            "active": 0,
            "yanchor": "top",
            "xanchor": "left",
            "currentvalue": {
                "font": {"size": 16},
                "prefix": "Year: ",
                "visible": True,
                "xanchor": "right"
            },
            "transition": {"duration": 300, "easing": "cubic-in-out"},
            "pad": {"b": 10, "t": 50},
            "len": 0.9,
            "x": 0.1,
            "y": 0,
            "steps": [
                {
                    "args": [[year], {
                        "frame": {"duration": 300, "redraw": True},
                        "mode": "immediate",
                    }],
                    "label": str(year),
                    "method": "animate"
                } for year in all_years
            ]
        }]
    )

    return fig


app2.run(debug=False, port = 8051) 

Later we do the same time-series plot having Higher Education and Internet usage correlate.

### Section 3: World Map

In [25]:
df[electricity_cols] = df[electricity_cols].fillna(0)
df = df[(df['Year'] > 1990) & (df['Year'] < 2023)]

# Calculate total electricity and percentages
df['Total_Electricity'] = df[electricity_cols].sum(axis=1)
for col in electricity_cols:
    df[f'{col}_pct'] = (df[col] / df['Total_Electricity'] * 100).round(1)

df['Dominant_Source'] = df[electricity_cols].idxmax(axis=1, skipna=False)
df['Dominant_Source'] = df['Dominant_Source'].str.replace('_Electricity_TWh', '')
df['Dominant_Source_Pct'] = (df[electricity_cols].max(axis=1, skipna=False) / df['Total_Electricity'] * 100).round(1)

# Get list of all countries and set some defaults
all_countries = sorted(df['Entity'].unique())

app = Dash(__name__)
app.title = "Global Electricity Generation Analysis"

app.layout = html.Div(
    style={
        "display": "flex",
        "flexDirection": "row",
        "backgroundColor": "#ffffff",
        "color": "#000000",
        "fontFamily": "Arial, sans-serif",
        "padding": "20px",
        "height": "auto",
        "width": "calc(100% - 40px)"
    },
    children=[
        html.Div([
            html.H4("Select countries:", style={"marginBottom": "10px"}),
            dcc.Checklist(
                id='country-selector',
                options=[{"label": c, "value": c} for c in all_countries],
                value=default_countries,
                labelStyle={'display': 'block', 'color': '#000'},
                style={
                    "overflowY": "auto",
                    "height": "calc(100vh - 100px)",
                    "border": "1px solid #ccc",
                    "padding": "10px",
                    "backgroundColor": "#f9f9f9"
                }
            )
        ], style={
            "width": "25%",
            "marginRight": "20px"
        }),

        html.Div([
            dcc.Tabs([
                dcc.Tab(label='Dominant Source', children=[
                    dcc.Graph(id='dominant-source-map')
                ]),
                dcc.Tab(label='Source Comparison', children=[
                    dcc.Graph(id='source-comparison')
                ]),
                dcc.Tab(label='Time Series', children=[
                    dcc.Graph(id='time-series')
                ])
            ])
        ], style={
            "width": "75%",
            "height": "calc(100vh - 100px)"
        })
    ]
)

@app.callback(
    Output('dominant-source-map', 'figure'),
    Input('country-selector', 'value')
)
def update_dominant_map(selected_countries):
    # Get latest year data for selected countries
    latest_year = df['Year'].max()
    filtered = df[(df['Entity'].isin(selected_countries)) & (df['Year'] == latest_year)].copy()
    
    if filtered.empty:
        return px.scatter(title="No data available for selected countries")
    
    fig = px.choropleth(
        filtered,
        locations='Entity',
        locationmode='country names',
        color='Dominant_Source',
        hover_name='Entity',
        hover_data={f'{col}_pct': ':.1f%' for col in electricity_cols},
        color_discrete_map={
            'Coal': 'black',
            'Gas': 'orange',
            'Oil': 'brown',
            'Nuclear': 'red',
            'Hydro': 'blue',
            'Wind': 'green',
            'Solar': 'yellow',
            'Bioenergy': 'darkgreen'
        },
        title=f'Dominant Electricity Source by Country ({latest_year})',
        projection='natural earth'
    )
    
    fig.update_layout(
        margin={"r":0,"t":40,"l":0,"b":0},
        height=600
    )
    return fig

@app.callback(
    Output('source-comparison', 'figure'),
    Input('country-selector', 'value')
)
def update_source_comparison(selected_countries):
    if not selected_countries:
        return px.scatter(title="Please select at least one country")
    
    filtered = df[df['Entity'].isin(selected_countries)].copy()
    latest_year = filtered['Year'].max()
    filtered = filtered[filtered['Year'] == latest_year]
    
    if filtered.empty:
        return px.scatter(title="No data available for selected countries")
    
    # Melt data for stacked bar chart
    melted = filtered.melt(
        id_vars=['Entity'],
        value_vars=[f'{col}_pct' for col in electricity_cols],
        var_name='Source',
        value_name='Percentage'
    )
    melted['Source'] = melted['Source'].str.replace('_pct', '')
    
    fig = px.bar(
        melted,
        x='Entity',
        y='Percentage',
        color='Source',
        title=f'Electricity Generation by Country ({latest_year})',
        labels={'Percentage': 'Percentage of Total Generation'},
        color_discrete_map={
              'Coal': 'black',
                'Gas': 'orange',
                'Oil': 'brown',
                'Nuclear': 'red',
                'Hydro': 'blue',
                'Wind': 'green',
                'Solar': 'yellow',
                'Bioenergy': 'darkgreen'
        }
    )
    
    fig.update_layout(
        barmode='stack',
        height=600,
        xaxis_title='Country',
        yaxis_title='Percentage (%)',
        yaxis=dict(range=[0, 100])
    )
    return fig
@app.callback(
    Output('time-series', 'figure'),
    Input('country-selector', 'value')
)
def update_time_series(selected_countries):
    if not selected_countries:
        return px.scatter(title="Please select at least one country")
    
    filtered = df[df['Entity'].isin(selected_countries)].copy()
    
    if filtered.empty:
        return px.scatter(title="No data available for selected countries")
    
    # Create animated line chart for total electricity generation
    fig = px.line(
        filtered,
        x='Year',
        y='Total_Electricity',
        color='Entity',
        title='Total Electricity Generation Over Time',
        labels={'Total_Electricity': 'Electricity Generation (TWh)'},
        hover_data=['Entity', 'Year', 'Total_Electricity'] + [f'{col}_pct' for col in electricity_cols]
    )
    
    fig.update_layout(
        height=600,
        xaxis_title='Year',
        yaxis_title='Total Generation (TWh)',
        hovermode='x unified'
    )
    
    return fig


app.run(debug=False, port=8053)

Then we make an interactive plot, where we can choose a country and see 3 main things. 1st is a world map, where you can see which energy source is dominant in each country, while color coding them. Next up we take the source comparison plot in each country to see which one has what percentage. The final plot is for The Total Generation of electricty over time for each country, where we see an increase for most of the countries.

In [28]:
el_co2_df = pd.read_csv("../data/generated/filtered_dataset.csv")

el_co2_df['Total_pop'] = el_co2_df['Urban_pop'] + el_co2_df['Rural_pop']
el_co2_df = el_co2_df[(el_co2_df['Year'] > 1984) & (el_co2_df['Year'] < 2023)]

# COâ‚‚ per person (tonnes per person)
el_co2_df['CO2_per_capita'] = el_co2_df['CO2_Emissions_Tonnes'] / el_co2_df['Total_pop']

# Sum up renewables (hydro, wind, solar, bio, non-bio)
renew_cols = [
    'Hydro_Electricity_TWh',
    'Wind_Electricity_TWh',
    'Solar_Electricity_TWh',
    'Bioenergy_Electricity_TWh',
    'Non_Bio_Renewables_TWh'
]
fossil_nuclear = [
    'Coal_Electricity_TWh',
    'Gas_Electricity_TWh',
    'Oil_Electricity_TWh',
    'Nuclear_Electricity_TWh'
]

el_co2_df['Renewables_TWh'] = el_co2_df[renew_cols].sum(axis=1)
el_co2_df['Total_Elec_TWh'] = el_co2_df[renew_cols + fossil_nuclear].sum(axis=1)
el_co2_df['Renewables_Pct_Elec'] = el_co2_df['Renewables_TWh'] / el_co2_df['Total_Elec_TWh'] * 100

# Prepare for plotting (drop any rows missing key metrics)
plot_df = el_co2_df[['Entity', 'Year', 'CO2_per_capita', 'Renewables_Pct_Elec']]

all_countries = sorted(plot_df['Entity'].unique())

# 2. Initialize Dash
app5 = Dash(__name__)
app5.title = "COâ‚‚ per Capita vs Renewables %"

# 3. Layout
app5.layout = html.Div(
    style={
        "display": "flex",
        "flexDirection": "row",
        "backgroundColor": "#ffffff",
        "color": "#000000",
        "fontFamily": "Arial, sans-serif",
        "padding": "20px",
        "height": "auto",
        "width": "calc(100% - 40px)"
    },
    children=[
        # Sidebar
        html.Div([
            html.H4("Select countries:", style={"marginBottom": "10px"}),
            dcc.Checklist(
                id='country-selector',
                options=[{"label": c, "value": c} for c in all_countries],
                value=default_countries,
                labelStyle={'display': 'block', 'fontSize': '12px'},
                style={
                    "overflowY": "auto",
                    "height": "calc(100vh - 60px)",
                    "border": "1px solid #ccc",
                    "padding": "10px",
                    "backgroundColor": "#f9f9f9"
                }
            )
        ], style={"width": "20%", "marginRight": "20px"}),

        # Graph
        html.Div([
            dcc.Graph(id='co2-renew-chart', style={"height": "100%", "width": "100%"})
        ], style={"width": "80%", "height": "100%"})
    ]
)

# 4. Callback
@app5.callback(
    Output('co2-renew-chart', 'figure'),
    Input('country-selector', 'value')
)
def update_scatter(selected_countries):
    if not selected_countries:
        return px.scatter(title="No countries selected")

    df_sub = plot_df[plot_df.Entity.isin(selected_countries)]
    
    # make sure years are ints and sorted
    years = sorted(df_sub['Year'].astype(int).unique())
    
    max_co2pc = df_sub['CO2_per_capita'].max() * 1.1
    max_ren  = df_sub['Renewables_Pct_Elec'].max() * 1.1

    fig = px.scatter(
        df_sub,
        x='Renewables_Pct_Elec',
        y='CO2_per_capita',
        animation_frame='Year',
        category_orders={'Year': years},    # <-- enforce correct slider order
        color='Entity',
        size_max=20,
        hover_data=['Entity', 'Year', 'CO2_per_capita', 'Renewables_Pct_Elec'],
        title=f'COâ‚‚ per Capita vs Renewables % of Electricity ({len(selected_countries)} countries)'
    )

    fig.update_layout(
        xaxis=dict(range=[0, max_ren], title='Renewables (% of total electricity)'),
        yaxis=dict(range=[0, max_co2pc], title='COâ‚‚ Emissions per Capita (tonnes/person)'),
        margin=dict(l=50, r=20, t=50, b=50),
        legend_title_text='Country',
        font=dict(size=11),
        height=600,
    )
    fig.update_xaxes(tickfont=dict(size=9))
    fig.update_yaxes(tickfont=dict(size=9))

    return fig

app5.run(debug=False, port=8055)


In this plot we can see the % of Renewable energy compared with CO2 emissions per capita.

In [36]:
app6 = Dash(__name__)
app6.title = "COâ‚‚ & Renewables Over Time"

# 3. Layout
app6.layout = html.Div(
    style={
        "display": "flex",
        "flexDirection": "row",
        "backgroundColor": "#ffffff",
        "color": "#000000",
        "fontFamily": "Arial, sans-serif",
        "padding": "20px",
        "height": "auto",
        "width": "calc(100% - 40px)"
    }, children=[

    html.Div(style={"width":"20%","marginRight":"20px"}, children=[
        html.Label("Select country:", style={"fontWeight":"bold"}),
        dcc.Dropdown(
            id='country-dd',
            options=[{"label":c,"value":c} for c in all_countries],
            value='United States',
            clearable=False,
            style={"width":"100%"}
        )
    ]),

    html.Div(style={"width":"80%","height":"100%"}, children=[
        dcc.Graph(id='dual-axis-chart', style={"height":"100%","width":"100%"})
    ])
])

# 4. Callback
@app6.callback(
    Output('dual-axis-chart','figure'),
    Input('country-dd','value')
)
def update_chart(country):
    d = plot_df[plot_df.Entity==country].sort_values('Year')
    fig = go.Figure()

    # CO2 per capita trace (left y-axis)
    fig.add_trace(go.Scatter(
        x=d.Year,
        y=d.CO2_per_capita,
        mode='lines+markers',
        name='COâ‚‚ per Capita',
        line=dict(color='firebrick', width=2),
        marker=dict(size=6),
        yaxis='y1'
    ))

    # Renewables % trace (right y-axis)
    fig.add_trace(go.Scatter(
        x=d.Year,
        y=d.Renewables_Pct_Elec,
        mode='lines+markers',
        name='Renewables % Elec',
        line=dict(color='forestgreen', width=2, dash='dash'),
        marker=dict(size=6),
        yaxis='y2'
    ))

    fig.update_layout(
        title=f"COâ‚‚ per Capita & Renewables % of Electricity: {country}",
        xaxis=dict(title='Year', tickmode='linear'),
        yaxis=dict(
            title='COâ‚‚ per Capita (tonnes/person)',
            side='left',
            showgrid=False
        ),
        yaxis2=dict(
            title='Renewables (% of total electricity)',
            overlaying='y',
            side='right',
            showgrid=False
        ),
        legend=dict(x=0.01, y=0.99, bordercolor='gray', borderwidth=1),
        margin=dict(l=50, r=60, t=50, b=40),
        height=None  # fills container
    )

    return fig

# 5. Run
app6.run(debug=False, port=8060)

In the final plot we can see the graphs of CO2 emissions per capita and reneawable eletricity %, where it is seen that for most countries the data for renewable electricty was introducted later on starting from 2000s. The trend is mostly positive for both of the components.