### Section 1: Setup & Data Cleaning

In [2]:
# Setup & Data Cleaning
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import pandas as pd
import importlib
import constants

importlib.reload(constants)

default_countries = constants.default_countries
electricity_cols = constants.electricity_cols
edu_cols = constants.edu_cols

df = pd.read_csv("../data/generated/filtered_dataset.csv")

all_countries = sorted(df["Entity"].unique())

### 📘 Section 2: GDP vs Internet Usage

In [3]:
gdp_inet_df_filtered = df[['Entity', 'Year', 'GDP_per_capita', 'Internet_Usage_Pct']].dropna()
gdp_inet_df_filtered = gdp_inet_df_filtered[(gdp_inet_df_filtered['Year'] > 1990) & (gdp_inet_df_filtered['Year'] < 2023)]

app1 = Dash(__name__)
app1.title = "GDP vs Internet Usage"

app1.layout = html.Div(
    style={
        "display": "flex",
        "flexDirection": "row",
        "backgroundColor": "#ffffff",
        "color": "#000000",
        "fontFamily": "Arial, sans-serif",
        "padding": "20px",
        "height": "auto",
        "width": "calc(100% - 40px)"
    },
    children=[
        html.Div([
            html.H4("Select countries:", style={"marginBottom": "10px"}),
            dcc.Checklist(
                id='country-selector',
                options=[{"label": c, "value": c} for c in all_countries],
                value=default_countries,
                labelStyle={'display': 'block', 'color': '#000'},
                style={
                    "overflowY": "auto",
                    "height": "calc(100vh - 100px)",
                    "border": "1px solid #ccc",
                    "padding": "10px",
                    "backgroundColor": "#f9f9f9"
                }
            )
        ], style={
            "width": "25%",
            "marginRight": "20px"
        }),

        html.Div([
            dcc.Graph(
                id='scatter-graph',
                style={
                    "height": "calc(100% - 100px)",
                    "width": "100%"
                }
            )
        ], style={
            "width": "75%",
            "height": "calc(100vh - 100px)"
        })
    ]
)

@app1.callback(
    Output('scatter-graph', 'figure'),
    Input('country-selector', 'value')
)
def update_graph(selected_countries):
    # Handle the case where no countries are selected
    if not selected_countries or len(selected_countries) == 0:
        # Create an empty figure with default ranges
        fig = px.scatter(
            x=[0], y=[0],
            title='GDP per Capita vs Internet Usage Over Time (No countries selected)'
        )
        fig.update_layout(
            xaxis=dict(range=[0, 100], title='Internet Usage (%)'),
            yaxis=dict(range=[0, 100000], title='GDP per Capita'),
            height=600
        )
        return fig

    filtered = gdp_inet_df_filtered[gdp_inet_df_filtered["Entity"].isin(selected_countries)].copy()
    all_years = sorted(filtered['Year'].unique())
    overall_max_gdp = filtered['GDP_per_capita'].max() * 1.1 if not filtered.empty else 100000

    # Create scatter plot with larger marker size
    fig = px.scatter(
        filtered,
        x='Internet_Usage_Pct',
        y='GDP_per_capita',
        animation_frame='Year',
        color='Entity',
        title=f'GDP per Capita vs Internet Usage Over Time ({len(selected_countries)} countries)',
        size_max=25,  # Maximum marker size
        hover_data=['Entity', 'Year', 'GDP_per_capita', 'Internet_Usage_Pct']  # Show these values on hover
    )

    # Further customize the markers and explicitly show legend for all traces
    fig.update_traces(
        marker=dict(
            size=15,           # Set default marker size
            line=dict(width=1, color='DarkSlateGrey')  # Add marker outline
        ),
        selector=dict(mode='markers'),
        showlegend=True  # Explicitly show the legend for these traces
    )

    # Improve layout for better visibility
    fig.update_layout(
        margin=dict(l=40, r=40, t=50, b=40),
        xaxis_title='Internet Usage (%)',
        yaxis_title='GDP per Capita',
        legend_title='Country',
        height=600,  # Fixed height in pixels
        xaxis=dict(
            range=[0, 100],  # Force x-axis range from 0 to 100%
            tickmode='linear',
            dtick=10  # Set tick marks every 10%
        ),
        yaxis=dict(
            range=[0, overall_max_gdp]  # Set y-axis range dynamically
        ),
        hoverlabel=dict(
            bgcolor="white",
            font_size=12,
            font_family="Arial"
        ),
        sliders=[{
            "active": 0,
            "yanchor": "top",
            "xanchor": "left",
            "currentvalue": {
                "font": {"size": 16},
                "prefix": "Year: ",
                "visible": True,
                "xanchor": "right"
            },
            "transition": {"duration": 300, "easing": "cubic-in-out"},
            "pad": {"b": 10, "t": 50},
            "len": 0.9,
            "x": 0.1,
            "y": 0,
            "steps": [
                {
                    "args": [[year], {
                        "frame": {"duration": 300, "redraw": True},
                        "mode": "immediate",
                    }],
                    "label": str(year),
                    "method": "animate"
                } for year in all_years
            ]
        }]
    )

    for frame in fig.frames:
        frame.layout.yaxis.range = [0, overall_max_gdp]

    return fig


app1.run(debug=False, port=8050)

### Section 3: Technology vs Education level (GDP)

In [4]:
gdp_edu_df_filtered = df[['Entity', 'Year', 'GDP_per_capita','Internet_Usage_Pct','Electricity_Access_Pct','Pop_PostSec_Pct']].dropna()

app2 = Dash(__name__)
app2.title = "Technology vs Education level"

app2.layout = html.Div(
    style={
        "display": "flex",
        "flexDirection": "row",
        "backgroundColor": "#ffffff",
        "color": "#000000",
        "fontFamily": "Arial, sans-serif",
        "padding": "20px",
        "height": "auto",
        "width": "calc(100% - 40px)"
    },
    children=[
        html.Div([
            html.H4("Select countries:", style={"marginBottom": "10px"}),
            dcc.Checklist(
                id='country-selector',
                options=[{"label": c, "value": c} for c in all_countries],
                value=default_countries,
                labelStyle={'display': 'block', 'color': '#000'},
                style={
                    "overflowY": "auto",
                    "height": "calc(100vh - 100px)",
                    "border": "1px solid #ccc",
                    "padding": "10px",
                    "backgroundColor": "#f9f9f9"
                }
            )
        ], style={
            "width": "25%",
            "marginRight": "20px"
        }),

        html.Div([
            dcc.Graph(
                id='scatter-graph',
                style={
                    "height": "calc(100% - 100px)",
                    "width": "100%"
                }
            )
        ], style={
            "width": "75%",
            "height": "calc(100vh - 100px)"
        })
    ]
)

@app2.callback(
    Output('scatter-graph', 'figure'),
    Input('country-selector', 'value')
)
def update_graph(selected_countries):
    if not selected_countries or len(selected_countries) == 0:
        fig = px.scatter(
            x=[0], y=[0],
            title='GDP per Capita vs Education level'
        )
        fig.update_layout(
            xaxis=dict(range=[0, 100], title='Higher Education (%)'),
            yaxis=dict(range=[0, 100], title='Internet Access (%)'),
            height=600
        )
        return fig

    filtered = gdp_edu_df_filtered[gdp_edu_df_filtered["Entity"].isin(selected_countries)].copy()
    all_years = sorted(filtered['Year'].unique())
    overall_max_gdp = filtered['GDP_per_capita'].max() * 1.1 if not filtered.empty else 100000

    # Create scatter plot
    fig = px.scatter(
        filtered,
        x='Pop_PostSec_Pct',  # Use the correct column name
        y='Internet_Usage_Pct',
        size ="GDP_per_capita",
        animation_frame='Year',
        color='Entity',
        title=f'GDP per Capita vs Higher Education Level ({len(selected_countries)} countries)',
        hover_data=['Entity', 'Year', 'Internet_Usage_Pct', 'Pop_PostSec_Pct', 'GDP_per_capita']
    )

    # Customize the markers
    fig.update_traces(
        marker=dict(
            size=15,
            line=dict(width=1, color='DarkSlateGrey')
        ),
        selector=dict(mode='markers'),
        showlegend=True
    )

    # Improve layout
    fig.update_layout(
        margin=dict(l=40, r=40, t=50, b=40),
        xaxis_title='Higher Education (% of population)',
        yaxis_title='Internet usage (% of population)',
        legend_title='Country',
        height=600,
        xaxis=dict(
            range=[0, 100],
            tickmode='linear',
            dtick=10
        ),
        yaxis=dict(
            range=[0, 100],
            tickmode='linear',
            dtick=10
        ),
        hoverlabel=dict(
            bgcolor="white",
            font_size=12,
            font_family="Arial"
        ),
        sliders=[{
            "active": 0,
            "yanchor": "top",
            "xanchor": "left",
            "currentvalue": {
                "font": {"size": 16},
                "prefix": "Year: ",
                "visible": True,
                "xanchor": "right"
            },
            "transition": {"duration": 300, "easing": "cubic-in-out"},
            "pad": {"b": 10, "t": 50},
            "len": 0.9,
            "x": 0.1,
            "y": 0,
            "steps": [
                {
                    "args": [[year], {
                        "frame": {"duration": 300, "redraw": True},
                        "mode": "immediate",
                    }],
                    "label": str(year),
                    "method": "animate"
                } for year in all_years
            ]
        }]
    )

    return fig


app2.run(debug=False, port = 8051) 

### Section 3: World Map

In [5]:
df[electricity_cols] = df[electricity_cols].fillna(0)
df = df[(df['Year'] > 1990) & (df['Year'] < 2023)]

# Calculate total electricity and percentages
df['Total_Electricity'] = df[electricity_cols].sum(axis=1)
for col in electricity_cols:
    df[f'{col}_pct'] = (df[col] / df['Total_Electricity'] * 100).round(1)

df['Dominant_Source'] = df[electricity_cols].idxmax(axis=1, skipna=False)
df['Dominant_Source'] = df['Dominant_Source'].str.replace('_Electricity_TWh', '')
df['Dominant_Source_Pct'] = (df[electricity_cols].max(axis=1, skipna=False) / df['Total_Electricity'] * 100).round(1)

# Get list of all countries and set some defaults
all_countries = sorted(df['Entity'].unique())

app = Dash(__name__)
app.title = "Global Electricity Generation Analysis"

app.layout = html.Div(
    style={
        "display": "flex",
        "flexDirection": "row",
        "backgroundColor": "#ffffff",
        "color": "#000000",
        "fontFamily": "Arial, sans-serif",
        "padding": "20px",
        "height": "auto",
        "width": "calc(100% - 40px)"
    },
    children=[
        html.Div([
            html.H4("Select countries:", style={"marginBottom": "10px"}),
            dcc.Checklist(
                id='country-selector',
                options=[{"label": c, "value": c} for c in all_countries],
                value=default_countries,
                labelStyle={'display': 'block', 'color': '#000'},
                style={
                    "overflowY": "auto",
                    "height": "calc(100vh - 100px)",
                    "border": "1px solid #ccc",
                    "padding": "10px",
                    "backgroundColor": "#f9f9f9"
                }
            )
        ], style={
            "width": "25%",
            "marginRight": "20px"
        }),

        html.Div([
            dcc.Tabs([
                dcc.Tab(label='Dominant Source', children=[
                    dcc.Graph(id='dominant-source-map')
                ]),
                dcc.Tab(label='Source Comparison', children=[
                    dcc.Graph(id='source-comparison')
                ]),
                dcc.Tab(label='Time Series', children=[
                    dcc.Graph(id='time-series')
                ])
            ])
        ], style={
            "width": "75%",
            "height": "calc(100vh - 100px)"
        })
    ]
)

@app.callback(
    Output('dominant-source-map', 'figure'),
    Input('country-selector', 'value')
)
def update_dominant_map(selected_countries):
    # Get latest year data for selected countries
    latest_year = df['Year'].max()
    filtered = df[(df['Entity'].isin(selected_countries)) & (df['Year'] == latest_year)].copy()
    
    if filtered.empty:
        return px.scatter(title="No data available for selected countries")
    
    fig = px.choropleth(
        filtered,
        locations='Entity',
        locationmode='country names',
        color='Dominant_Source',
        hover_name='Entity',
        hover_data={f'{col}_pct': ':.1f%' for col in electricity_cols},
        color_discrete_map={
            'Coal': 'black',
            'Gas': 'orange',
            'Oil': 'brown',
            'Nuclear': 'red',
            'Hydro': 'blue',
            'Wind': 'green',
            'Solar': 'yellow',
            'Bioenergy': 'darkgreen'
        },
        title=f'Dominant Electricity Source by Country ({latest_year})',
        projection='natural earth'
    )
    
    fig.update_layout(
        margin={"r":0,"t":40,"l":0,"b":0},
        height=600
    )
    return fig

@app.callback(
    Output('source-comparison', 'figure'),
    Input('country-selector', 'value')
)
def update_source_comparison(selected_countries):
    if not selected_countries:
        return px.scatter(title="Please select at least one country")
    
    filtered = df[df['Entity'].isin(selected_countries)].copy()
    latest_year = filtered['Year'].max()
    filtered = filtered[filtered['Year'] == latest_year]
    
    if filtered.empty:
        return px.scatter(title="No data available for selected countries")
    
    # Melt data for stacked bar chart
    melted = filtered.melt(
        id_vars=['Entity'],
        value_vars=[f'{col}_pct' for col in electricity_cols],
        var_name='Source',
        value_name='Percentage'
    )
    melted['Source'] = melted['Source'].str.replace('_pct', '')
    
    fig = px.bar(
        melted,
        x='Entity',
        y='Percentage',
        color='Source',
        title=f'Electricity Generation by Country ({latest_year})',
        labels={'Percentage': 'Percentage of Total Generation'},
        color_discrete_map={
              'Coal': 'black',
                'Gas': 'orange',
                'Oil': 'brown',
                'Nuclear': 'red',
                'Hydro': 'blue',
                'Wind': 'green',
                'Solar': 'yellow',
                'Bioenergy': 'darkgreen'
        }
    )
    
    fig.update_layout(
        barmode='stack',
        height=600,
        xaxis_title='Country',
        yaxis_title='Percentage (%)',
        yaxis=dict(range=[0, 100])
    )
    return fig
@app.callback(
    Output('time-series', 'figure'),
    Input('country-selector', 'value')
)
def update_time_series(selected_countries):
    if not selected_countries:
        return px.scatter(title="Please select at least one country")
    
    filtered = df[df['Entity'].isin(selected_countries)].copy()
    
    if filtered.empty:
        return px.scatter(title="No data available for selected countries")
    
    # Create animated line chart for total electricity generation
    fig = px.line(
        filtered,
        x='Year',
        y='Total_Electricity',
        color='Entity',
        title='Total Electricity Generation Over Time',
        labels={'Total_Electricity': 'Electricity Generation (TWh)'},
        hover_data=['Entity', 'Year', 'Total_Electricity'] + [f'{col}_pct' for col in electricity_cols]
    )
    
    fig.update_layout(
        height=600,
        xaxis_title='Year',
        yaxis_title='Total Generation (TWh)',
        hovermode='x unified'
    )
    
    return fig


app.run(debug=False, port=8053)