### Section 1: Setup & Data Cleaning

In [48]:
# Setup & Data Cleaning
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import pandas as pd

# Load dataset
df = pd.read_csv("../data/generated/filtered_dataset.csv")

# Rename for ease of use — check this immediately works
df.rename(columns={
    'GDP ': 'GDP_per_capita',
    'InternetUsage%': 'Internet_Usage_Pct',
    'Electricity _access%': 'Electricity_Access_Pct',
    'CO2_Emissions': 'CO2_Emissions_Tonnes'
}, inplace=True)

default_countries = [
    "Armenia", "Georgia", "Azerbaijan",
    "United States", "United Kingdom",
    "Germany", "Russia", "Japan", "China"
]

all_countries = sorted(df["Entity"].unique())
# Now proceed
df.head()

Unnamed: 0,Entity,Year,GDP_per_capita,Urban_pop,Rural_pop,Pop_PostSec,Pop_Usec,Pop_Lsec,Pop_Primary,Pop_IncompPrim,...,Bioenergy_Electricity_TWh,Solar_Electricity_TWh,Wind_Electricity_TWh,Hydro_Electricity_TWh,Nuclear_Electricity_TWh,Oil_Electricity_TWh,Gas_Electricity_TWh,Coal_Electricity_TWh,Electricity_Access_Pct,CO2_Emissions_Tonnes
0,United Arab Emirates,1970,,174555.0,60944.0,10900.0,25200.0,15300.0,13700.0,22400.0,...,,,,,,,,,,15234912.0
1,United Arab Emirates,1971,,207506.0,87604.0,,,,,,...,,,,,,,,,,21166928.0
2,United Arab Emirates,1972,,248822.0,105899.0,,,,,,...,,,,,,,,,,23434944.0
3,United Arab Emirates,1973,,298560.0,115772.0,,,,,,...,,,,,,,,,,30605392.0
4,United Arab Emirates,1974,,356461.0,117482.0,,,,,,...,,,,,,,,,,31301552.0


### 📘 Section 2: GDP vs Internet Usage

In [43]:
gdp_inet_df_filtered = df[['Entity', 'Year', 'GDP_per_capita', 'Internet_Usage_Pct']].dropna()
gdp_inet_df_filtered = gdp_inet_df_filtered[gdp_inet_df_filtered['Year'] > 1990]
gdp_inet_df_filtered = gdp_inet_df_filtered[gdp_inet_df_filtered['Year'] < 2023]
# gdp_inet_df_filtered = gdp_inet_df_filtered[gdp_inet_df_filtered['Entity'] == 'Georgia']

# gdp_inet_df_filtered.head(len(gdp_inet_df_filtered))

app = Dash(__name__)
app.title = "GDP vs Internet Usage"

app.layout = html.Div(
    style={
        "display": "flex",
        "flexDirection": "row",
        "backgroundColor": "#ffffff",
        "color": "#000000",
        "fontFamily": "Arial, sans-serif",
        "padding": "20px",
        "height": "auto",
        "width": "calc(100% - 40px)"
    },
    children=[
        html.Div([
            html.H4("Select countries:", style={"marginBottom": "10px"}),
            dcc.Checklist(
                id='country-selector',
                options=[{"label": c, "value": c} for c in all_countries],
                value=default_countries,
                labelStyle={'display': 'block', 'color': '#000'},
                style={
                    "overflowY": "auto",
                    "height": "calc(100vh - 100px)",
                    "border": "1px solid #ccc",
                    "padding": "10px",
                    "backgroundColor": "#f9f9f9"
                }
            )
        ], style={
            "width": "25%",
            "marginRight": "20px"
        }),

        html.Div([
            dcc.Graph(
                id='scatter-graph',
                style={
                    "height": "calc(100% - 100px)",
                    "width": "100%"
                }
            )
        ], style={
            "width": "75%",
            "height": "calc(100vh - 100px)"
        })
    ]
)

@app.callback(
    Output('scatter-graph', 'figure'),
    Input('country-selector', 'value')
)
def update_graph(selected_countries):
    # Handle the case where no countries are selected
    if not selected_countries or len(selected_countries) == 0:
        # Create an empty figure with default ranges
        fig = px.scatter(
            x=[0], y=[0],
            title='GDP per Capita vs Internet Usage Over Time (No countries selected)'
        )
        fig.update_layout(
            xaxis=dict(range=[0, 100], title='Internet Usage (%)'),
            yaxis=dict(range=[0, 100000], title='GDP per Capita'),
            height=600
        )
        return fig

    filtered = gdp_inet_df_filtered[gdp_inet_df_filtered["Entity"].isin(selected_countries)].copy()
    all_years = sorted(filtered['Year'].unique())
    overall_max_gdp = filtered['GDP_per_capita'].max() * 1.1 if not filtered.empty else 100000

    # Create scatter plot with larger marker size
    fig = px.scatter(
        filtered,
        x='Internet_Usage_Pct',
        y='GDP_per_capita',
        animation_frame='Year',
        color='Entity',
        title=f'GDP per Capita vs Internet Usage Over Time ({len(selected_countries)} countries)',
        size_max=25,  # Maximum marker size
        hover_data=['Entity', 'Year', 'GDP_per_capita', 'Internet_Usage_Pct']  # Show these values on hover
    )

    # Further customize the markers and explicitly show legend for all traces
    fig.update_traces(
        marker=dict(
            size=15,           # Set default marker size
            line=dict(width=1, color='DarkSlateGrey')  # Add marker outline
        ),
        selector=dict(mode='markers'),
        showlegend=True  # Explicitly show the legend for these traces
    )

    # Improve layout for better visibility
    fig.update_layout(
        margin=dict(l=40, r=40, t=50, b=40),
        xaxis_title='Internet Usage (%)',
        yaxis_title='GDP per Capita',
        legend_title='Country',
        height=600,  # Fixed height in pixels
        xaxis=dict(
            range=[0, 100],  # Force x-axis range from 0 to 100%
            tickmode='linear',
            dtick=10  # Set tick marks every 10%
        ),
        yaxis=dict(
            range=[0, overall_max_gdp]  # Set y-axis range dynamically
        ),
        hoverlabel=dict(
            bgcolor="white",
            font_size=12,
            font_family="Arial"
        ),
        sliders=[{
            "active": 0,
            "yanchor": "top",
            "xanchor": "left",
            "currentvalue": {
                "font": {"size": 16},
                "prefix": "Year: ",
                "visible": True,
                "xanchor": "right"
            },
            "transition": {"duration": 300, "easing": "cubic-in-out"},
            "pad": {"b": 10, "t": 50},
            "len": 0.9,
            "x": 0.1,
            "y": 0,
            "steps": [
                {
                    "args": [[year], {
                        "frame": {"duration": 300, "redraw": True},
                        "mode": "immediate",
                    }],
                    "label": str(year),
                    "method": "animate"
                } for year in all_years
            ]
        }]
    )

    # Ensure each animation frame uses the same y-axis range
    for frame in fig.frames:
        frame.layout.yaxis.range = [0, overall_max_gdp]

    return fig


if __name__ == "__main__":
    app.run(debug=False)


### Section 3: GDP vs Education level 

In [154]:
# Filter the required education columns
edu_cols = ['Pop_PostSec', 'Pop_Usec', 'Pop_Lsec', 'Pop_Primary', 'Pop_IncompPrim', 'Pop_None']
gdp_edu_df_filtered = df[['Entity', 'Year', 'GDP_per_capita','Internet_Usage_Pct'] + edu_cols].dropna()


# Create the Total_Edu_Pop column
gdp_edu_df_filtered['Total_Edu_Pop'] = gdp_edu_df_filtered[edu_cols].sum(axis=1)

# Calculate the percentage columns for each education level
for col in edu_cols:
    pct_col = col + '_Pct'
    gdp_edu_df_filtered[pct_col] = (gdp_edu_df_filtered[col] / gdp_edu_df_filtered['Total_Edu_Pop']) * 100

# Min-Max Normalization for Internet_Usage_Pct
gdp_edu_df_filtered['Internet_Usage_Pct_Normalized'] = (
    (gdp_edu_df_filtered['Internet_Usage_Pct'] - gdp_edu_df_filtered['Internet_Usage_Pct'].min()) / 
    (gdp_edu_df_filtered['Internet_Usage_Pct'].max() - gdp_edu_df_filtered['Internet_Usage_Pct'].min())
)

# Check the result
print(gdp_edu_df_filtered[['Internet_Usage_Pct', 'Internet_Usage_Pct_Normalized']].head())
# Print the column names
print(gdp_edu_df_filtered.columns)

# Instead of 'Primary_Education_Pct', use 'Pop_Primary_Pct' (or any other appropriate column)
print(gdp_edu_df_filtered[['Pop_Primary_Pct', 'GDP_per_capita']].head())



    Internet_Usage_Pct  Internet_Usage_Pct_Normalized
20            0.000000                       0.000000
25            0.102939                       0.001029
30           23.625300                       0.236253
35           40.000000                       0.400000
40           68.000000                       0.680000
Index(['Entity', 'Year', 'GDP_per_capita', 'Internet_Usage_Pct', 'Pop_PostSec',
       'Pop_Usec', 'Pop_Lsec', 'Pop_Primary', 'Pop_IncompPrim', 'Pop_None',
       'Total_Edu_Pop', 'Pop_PostSec_Pct', 'Pop_Usec_Pct', 'Pop_Lsec_Pct',
       'Pop_Primary_Pct', 'Pop_IncompPrim_Pct', 'Pop_None_Pct',
       'Internet_Usage_Pct_Normalized'],
      dtype='object')
    Pop_Primary_Pct  GDP_per_capita
20        10.013532      108057.040
25        10.675331      101412.086
30        11.205493       92547.230
35        11.613132       89986.130
40        11.503301       68799.930


In [180]:
app = Dash(__name__)
app.title = "GDP vs Education level"

app.layout = html.Div(
    style={
        "display": "flex",
        "flexDirection": "row",
        "backgroundColor": "#ffffff",
        "color": "#000000",
        "fontFamily": "Arial, sans-serif",
        "padding": "20px",
        "height": "auto",
        "width": "calc(100% - 40px)"
    },
    children=[
        html.Div([
            html.H4("Select countries:", style={"marginBottom": "10px"}),
            dcc.Checklist(
                id='country-selector',
                options=[{"label": c, "value": c} for c in all_countries],
                value=default_countries,
                labelStyle={'display': 'block', 'color': '#000'},
                style={
                    "overflowY": "auto",
                    "height": "calc(100vh - 100px)",
                    "border": "1px solid #ccc",
                    "padding": "10px",
                    "backgroundColor": "#f9f9f9"
                }
            )
        ], style={
            "width": "25%",
            "marginRight": "20px"
        }),

        html.Div([
            dcc.Graph(
                id='scatter-graph',
                style={
                    "height": "calc(100% - 100px)",
                    "width": "100%"
                }
            )
        ], style={
            "width": "75%",
            "height": "calc(100vh - 100px)"
        })
    ]
)

@app.callback(
    Output('scatter-graph', 'figure'),
    Input('country-selector', 'value')
)
def update_graph(selected_countries):
    # Handle the case where no countries are selected
    if not selected_countries or len(selected_countries) == 0:
        # Create an empty figure with default ranges
        fig = px.scatter(
            x=[0], y=[0],
            title='GDP per Capita vs Education level'
        )
        fig.update_layout(
            xaxis=dict(range=[-10, 100], title='Primary Education (%)'),
            yaxis=dict(range=[0, 100000], title='GDP per Capita'),
            height=600
        )
        return fig

    filtered = gdp_edu_df_filtered[gdp_edu_df_filtered["Entity"].isin(selected_countries)].copy()
    all_years = sorted(filtered['Year'].unique())
    overall_max_gdp = filtered['GDP_per_capita'].max() * 1.1 if not filtered.empty else 100000

    # Create scatter plot
    fig = px.scatter(
        filtered,
        x='Pop_Primary_Pct',  # Use the correct column name
        y='GDP_per_capita',
        size ="Internet_Usage_Pct",
        animation_frame='Year',
        color='Entity',
        title=f'GDP per Capita vs Primary Education Level ({len(selected_countries)} countries)',
        hover_data=['Entity', 'Year', 'GDP_per_capita', 'Pop_Primary_Pct']
    )

    # Customize the markers
    fig.update_traces(
        marker=dict(
            size=15,
            line=dict(width=1, color='DarkSlateGrey')
        ),
        selector=dict(mode='markers'),
        showlegend=True
    )

    # Improve layout
    fig.update_layout(
        margin=dict(l=40, r=40, t=50, b=40),
        xaxis_title='Primary Education (% of population)',
        yaxis_title='GDP per Capita',
        legend_title='Country',
        height=600,
        xaxis=dict(
            range=[-10, 100],
            tickmode='linear',
            dtick=10
        ),
        yaxis=dict(
            range=[0, overall_max_gdp]
        ),
        hoverlabel=dict(
            bgcolor="white",
            font_size=12,
            font_family="Arial"
        )
    )

    # Ensure each animation frame uses the same y-axis range
    for frame in fig.frames:
        frame.layout.yaxis.range = [0, overall_max_gdp]

    return fig


if __name__ == "__main__":
    app.run(debug=False) 

In [168]:
### GDP vs 

In [152]:
# Filter the required education columns
edu_cols = ['Pop_PostSec', 'Pop_Usec', 'Pop_Lsec', 'Pop_Primary', 'Pop_IncompPrim', 'Pop_None']
gdp_edu_df_filtered = df[['Entity', 'Year', 'GDP_per_capita','Electricity_Access_Pct'] + edu_cols].dropna()

# Create the Total_Edu_Pop column
gdp_edu_df_filtered['Total_Edu_Pop'] = gdp_edu_df_filtered[edu_cols].sum(axis=1)

# Calculate the percentage columns for each education level
for col in edu_cols:
    pct_col = col + '_Pct'
    gdp_edu_df_filtered[pct_col] = (gdp_edu_df_filtered[col] / gdp_edu_df_filtered['Total_Edu_Pop']) * 100

# Print the column names
print(gdp_edu_df_filtered.columns)

# Instead of 'Primary_Education_Pct', use 'Pop_Primary_Pct' (or any other appropriate column)
print(gdp_edu_df_filtered[['Pop_Primary_Pct', 'GDP_per_capita']].head())


Index(['Entity', 'Year', 'GDP_per_capita', 'Electricity_Access_Pct',
       'Pop_PostSec', 'Pop_Usec', 'Pop_Lsec', 'Pop_Primary', 'Pop_IncompPrim',
       'Pop_None', 'Total_Edu_Pop', 'Pop_PostSec_Pct', 'Pop_Usec_Pct',
       'Pop_Lsec_Pct', 'Pop_Primary_Pct', 'Pop_IncompPrim_Pct',
       'Pop_None_Pct'],
      dtype='object')
    Pop_Primary_Pct  GDP_per_capita
20        10.013532      108057.040
25        10.675331      101412.086
30        11.205493       92547.230
35        11.613132       89986.130
40        11.503301       68799.930


In [150]:
app = Dash(__name__)
app.title = "GDP vs Education level"

app.layout = html.Div(
    style={
        "display": "flex",
        "flexDirection": "row",
        "backgroundColor": "#ffffff",
        "color": "#000000",
        "fontFamily": "Arial, sans-serif",
        "padding": "20px",
        "height": "auto",
        "width": "calc(100% - 40px)"
    },
    children=[
        html.Div([
            html.H4("Select countries:", style={"marginBottom": "10px"}),
            dcc.Checklist(
                id='country-selector',
                options=[{"label": c, "value": c} for c in all_countries],
                value=default_countries,
                labelStyle={'display': 'block', 'color': '#000'},
                style={
                    "overflowY": "auto",
                    "height": "calc(100vh - 100px)",
                    "border": "1px solid #ccc",
                    "padding": "10px",
                    "backgroundColor": "#f9f9f9"
                }
            )
        ], style={
            "width": "25%",
            "marginRight": "20px"
        }),

        html.Div([
            dcc.Graph(
                id='scatter-graph',
                style={
                    "height": "calc(100% - 100px)",
                    "width": "100%"
                }
            )
        ], style={
            "width": "75%",
            "height": "calc(100vh - 100px)"
        })
    ]
)

@app.callback(
    Output('scatter-graph', 'figure'),
    Input('country-selector', 'value')
)
def update_graph(selected_countries):
    # Handle the case where no countries are selected
    if not selected_countries or len(selected_countries) == 0:
        # Create an empty figure with default ranges
        fig = px.scatter(
            x=[0], y=[0],
            title='GDP per Capita vs Education level'
        )
        fig.update_layout(
            xaxis=dict(range=[-10, 100], title='Primary Education (%)'),
            yaxis=dict(range=[0, 100000], title='GDP per Capita'),
            height=600
        )
        return fig

    filtered = gdp_edu_df_filtered[gdp_edu_df_filtered["Entity"].isin(selected_countries)].copy()
    all_years = sorted(filtered['Year'].unique())
    overall_max_gdp = filtered['GDP_per_capita'].max() * 1.1 if not filtered.empty else 100000

    # Create scatter plot
    fig = px.scatter(
        filtered,
        x='Pop_Primary_Pct',  # Use the correct column name
        y='GDP_per_capita',
        size ="Electricity_Access_Pct",
        animation_frame='Year',
        color='Entity',
        title=f'GDP per Capita vs Primary Education Level ({len(selected_countries)} countries)',
        hover_data=['Entity', 'Year', 'GDP_per_capita', 'Pop_Primary_Pct']
    )

    # Customize the markers
    fig.update_traces(
        marker=dict(
            size=15,
            line=dict(width=1, color='DarkSlateGrey')
        ),
        selector=dict(mode='markers'),
        showlegend=True
    )

    # Improve layout
    fig.update_layout(
        margin=dict(l=40, r=40, t=50, b=40),
        xaxis_title='Primary Education (% of population)',
        yaxis_title='GDP per Capita',
        legend_title='Country',
        height=600,
        xaxis=dict(
            range=[-10, 100],
            tickmode='linear',
            dtick=10
        ),
        yaxis=dict(
            range=[0, overall_max_gdp]
        ),
        hoverlabel=dict(
            bgcolor="white",
            font_size=12,
            font_family="Arial"
        ),
         sliders=[{
            "active": 0,
            "yanchor": "top",
            "xanchor": "left",
            "currentvalue": {
                "font": {"size": 16},
                "prefix": "Year: ",
                "visible": True,
                "xanchor": "right"
            },
            "transition": {"duration": 300, "easing": "cubic-in-out"},
            "pad": {"b": 10, "t": 50},
            "len": 0.9,
            "x": 0.1,
            "y": 0,
            "steps": [
                {
                    "args": [[year], {
                        "frame": {"duration": 300, "redraw": True},
                        "mode": "immediate",
                    }],
                    "label": str(year),
                    "method": "animate"
                } for year in all_years
            ]
        }]
    )


    # Ensure each animation frame uses the same y-axis range
    for frame in fig.frames:
        frame.layout.yaxis.range = [0, overall_max_gdp]

    return fig


if __name__ == "__main__":
    app.run(debug=False)