In [33]:
import altair as alt

# Your data URL
data_url = 'https://raw.githubusercontent.com/Noura-23/Noura-23.github.io/refs/heads/main/Project_data/chart3_final_data.csv'

# Create dropdown selection for sub-region
region_dropdown = alt.selection_point(
    fields=['sub-region'],
    bind=alt.binding_select(
        options=[None, 'Northern Africa', 'Western Africa', 'Eastern Africa', 'Middle Africa', 'Southern Africa'],
        labels=['All Regions', 'Northern Africa', 'Western Africa', 'Eastern Africa', 'Middle Africa', 'Southern Africa'],
        name='Select Region: '
    )
)

# Base chart - NO calculations, just filter
base = alt.Chart(data_url).transform_filter(
    region_dropdown
)

# Scatter plot layer
scatter = base.mark_circle(
    size=120,  # REDUCED from 150
    opacity=0.85  # INCREASED from 0.8
).encode(
    x=alt.X('GDP per capita, PPP:Q').title('GDP per capita, PPP (log scale)').scale(
        type='log',
        domain=[1500, 35000]  # EXPANDED to include Mauritius
    ).axis(
        values=[2000, 5000, 10000, 20000, 30000],  # ORIGINAL + added 30000
        gridOpacity=0.3,  # ORIGINAL
        labelAngle=0,
        labelFontSize=11,  # ORIGINAL
        format='$,',
        ticks=False,
        titleFontWeight='bold',
        titlePadding=6
    ),
    y=alt.Y('tariffs:Q').title('Residential Electricity Tariff ($/kWh)').axis(
        values=[0, 0.05, 0.10, 0.15, 0.20, 0.25],  # ORIGINAL
        ticks=False,
        labelFontSize=11,  # ORIGINAL
        labelPadding=5,
        titleFontWeight='bold',
        titlePadding=6,
        gridOpacity=0.3,  # ORIGINAL
        format='$.2f'
    ).scale(
        domain=[0, 0.27]
    ),
    color=alt.Color('sub-region:N').scale(
        domain=['Northern Africa', 'Western Africa', 'Eastern Africa', 'Middle Africa', 'Southern Africa'],
        range=['#E57373', '#FFA726', '#FFD54F', '#81C784', '#64B5F6']
    ).legend(
        title='Sub-region',
        orient='right',
        direction='vertical',
        offset=10,
        labelFontSize=11
    ),
    size=alt.Size('Population:Q').scale(
        range=[30, 400]  # REDUCED from [50, 800]
    ).legend(None),
    tooltip=[
        alt.Tooltip('name:N', title='Country'),
        alt.Tooltip('GDP per capita, PPP:Q', format='$,.0f', title='GDP per capita, PPP'),
        alt.Tooltip('tariffs:Q', format='$.3f', title='Tariff ($/kWh)')
    ]
).add_params(
    region_dropdown
)

# Regression line layer
regression = alt.Chart(data_url).transform_filter(
    region_dropdown
).mark_line(
    color='#666666',
    strokeWidth=2.5,
    strokeDash=[5, 5],
    opacity=0.5
).transform_regression(
    'GDP per capita, PPP',
    'tariffs',
    extent=[1490, 18600],
).encode(
    x=alt.X('GDP per capita, PPP:Q').scale(
        type='log',
        domain=[1500, 35000]
    ),
    y=alt.Y('tariffs:Q').scale(
        domain=[0, 0.27]
    )
)

# Combine layers
chart = (scatter + regression).properties(
    width=400,  # REDUCED from 500
    height=350,  # REDUCED from 400
    title=alt.Title(
        text='Residential Electricity Price vs GDP per capita, PPP',
        subtitle='Source: World Bank (2024), Global Petrol Prices (June 2025)',
        subtitleFontStyle='italic',
        subtitleFontSize=11,
        anchor='start',
        frame='group',
        offset=10,
        fontSize=14
    )
).configure_view(
    stroke='transparent'
)

chart.display()