# Urbanization & Life Quality — Plotly Visuals
Interactive, polished replicas of the existing static charts in `faseeh/visualizations`. Run from the project root or this folder; adjust the data path in the next cell if needed.


In [10]:
from pathlib import Path
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Resolve data path robustly from likely working directories
DATA_FILE = None
for base in [Path.cwd(), Path.cwd().parent, Path.cwd().parent.parent]:
    candidate = base / "data_cleaned" / "combined_urbanization_life_quality_2008_2020.csv"
    if candidate.exists():
        DATA_FILE = candidate
        break

if DATA_FILE is None:
    raise FileNotFoundError("combined_urbanization_life_quality_2008_2020.csv not found. Adjust DATA_FILE manually.")

print(f"Using data file: {DATA_FILE}")
df_raw = pd.read_csv(DATA_FILE)
df_raw.head()


Using data file: d:\Study Stuff\Semester\Projects\5th Semester\DV\DataVisualization_LifeQualityWithIncreasingUrbanPopulation\data_cleaned\combined_urbanization_life_quality_2008_2020.csv


Unnamed: 0,Country,Country_Code,Year,total_pop,pop_dens_sq_km,urban_pop_perc,rural_pop_perc,elect_access_pop,ren_energy_cons_perc,clean_fuel_tech_cook_pop,...,ongoing conflict,militarisation,"Agriculture, forestry, and fishing, value added (% of GDP)",Adjusted savings: natural resources depletion (% of GNI),Adjusted savings: net forest depletion (% of GNI),Access to electricity (% of population),Adjusted savings: energy depletion (% of GNI),Adjusted savings: carbon dioxide damage (% of GNI),Access to clean fuels and technologies for cooking (% of population),"Agriculture, forestry, and fishing, value added (annual % growth)"
0,Armenia,ARM,2008,3087100.0,108.433439,63.64,36.36,99.2,6.4,94.9,...,2.029,2.233,17.0,0.201529,0.201529,99.2,0.0,1.236159,94.9,3.0
1,Armenia,ARM,2009,3066000.0,107.692308,63.54,36.46,99.3,7.8,95.6,...,2.041,2.233,19.0,0.347525,0.327672,99.3,0.0,1.38291,95.6,5.922554
2,Armenia,ARM,2010,3044800.0,106.947664,63.44,36.56,99.8,9.4,96.1,...,2.071,2.264,16.9,1.580881,0.284005,99.8,0.0,1.276213,96.1,-15.905777
3,Armenia,ARM,2011,3027900.0,106.354057,63.34,36.66,99.5,8.0,96.6,...,2.071,2.168,22.0,1.916095,0.438088,99.5,0.0,1.403063,96.6,14.0
4,Armenia,ARM,2012,3024100.0,106.220583,63.24,36.76,99.5,6.6,96.8,...,2.071,2.096,17.912682,0.9759,0.339469,99.5,0.0,1.609445,96.8,9.5


In [11]:
def add_clusters(df: pd.DataFrame):
    """Attach 2-cluster labels (rich vs developing) based on urbanization/electricity means."""
    temp = df.groupby('Country')[['urban_pop_perc', 'elect_access_pop']].mean().dropna()
    scaler = StandardScaler()
    temp['Cluster'] = KMeans(n_clusters=2, random_state=42, n_init=10).fit_predict(scaler.fit_transform(temp))
    rich_id = temp.groupby('Cluster')['urban_pop_perc'].mean().idxmax()

    df_out = df.copy()
    df_out['Cluster'] = df_out['Country'].map(temp['Cluster'])
    df_out['Cluster_Label'] = df_out['Cluster'].apply(lambda x: 'Rich/Stable' if x == rich_id else 'Developing/Volatile')
    return df_out, rich_id


df, rich_cluster = add_clusters(df_raw)
print(df[['Country', 'Year', 'Cluster_Label']].head())


   Country  Year Cluster_Label
0  Armenia  2008   Rich/Stable
1  Armenia  2009   Rich/Stable
2  Armenia  2010   Rich/Stable
3  Armenia  2011   Rich/Stable
4  Armenia  2012   Rich/Stable


## 1) Structural Transformation — Urbanization vs Agriculture
Interactive scatter with linear trend; hover shows country/year.


In [12]:
y_col = 'Agriculture, forestry, and fishing, value added (% of GDP)'

fig1 = px.scatter(
    df,
    x='urban_pop_perc',
    y=y_col,
    color='Cluster_Label',
    hover_data=['Country', 'Year'],
    opacity=0.7,
    color_discrete_sequence=px.colors.qualitative.Vivid,
    labels={
        'urban_pop_perc': 'Urban Population (%)',
        y_col: 'Agriculture Value Added (% of GDP)',
        'Cluster_Label': 'Economic Cluster'
    },
    title='Structural Transformation: Urbanization vs. Agriculture'
)

# Add a simple linear trend line
clean1 = df[['urban_pop_perc', y_col]].dropna().sort_values('urban_pop_perc')
coef = np.polyfit(clean1['urban_pop_perc'], clean1[y_col], 1)
xs = np.linspace(clean1['urban_pop_perc'].min(), clean1['urban_pop_perc'].max(), 100)
fig1.add_trace(
    go.Scatter(
        x=xs,
        y=np.polyval(coef, xs),
        mode='lines',
        name='Trend',
        line=dict(color='black', dash='dash')
    )
)

fig1.update_layout(legend_title_text='Economic Cluster', template='plotly_white')
fig1.show()


## 3) Energy Paradox — Renewables vs Clean Cooking
Bubble sizes show urbanization; dashed line marks the modern access threshold.


In [None]:
fig3 = px.scatter(
    df,
    x='ren_energy_cons_perc',
    y='clean_fuel_tech_cook_pop',
    size='urban_pop_perc',
    color='Cluster_Label',
    hover_data=['Country', 'Year'],
    size_max=40,
    opacity=0.75,
    labels={
        'ren_energy_cons_perc': 'Renewable Energy Consumption (%)',
        'clean_fuel_tech_cook_pop': 'Access to Clean Cooking Fuels (%)',
        'urban_pop_perc': 'Urban Population (%)'
    },
    title='The Energy Paradox: Renewables vs. Clean Cooking'
)

fig3.add_hline(y=95, line_dash='dash', line_color='firebrick', opacity=0.6)
fig3.add_annotation(x=80, y=97, text='Modern Standard (>95%)', showarrow=False, font=dict(color='firebrick'))
fig3.update_layout(legend_title_text='Economic Cluster', template='plotly_white')
fig3.show()


## 4) Fast Modernizers — Agriculture Drop After Urbanization Surge
Top 10 fastest urbanizers; bars show change in agriculture share (2020 vs 2008).


In [None]:
ag_col = 'Agriculture, forestry, and fishing, value added (% of GDP)'
start = df[df['Year'] == 2008].set_index('Country')
end = df[df['Year'] == 2020].set_index('Country')

# Only keep countries with both years
cols = ['urban_pop_perc', ag_col]
delta = end[cols].dropna().subtract(start[cols], fill_value=np.nan).dropna()

fastest = delta.sort_values('urban_pop_perc', ascending=False).head(10)
fastest = fastest.rename(columns={'urban_pop_perc': 'Δ Urban pop (p.p.)', ag_col: 'Δ Agriculture share (p.p.)'})

fig4 = px.bar(
    fastest,
    y=fastest.index,
    x='Δ Agriculture share (p.p.)',
    orientation='h',
    color='Δ Agriculture share (p.p.)',
    color_continuous_scale=px.colors.diverging.RdBu,
    color_continuous_midpoint=0,
    labels={'y': 'Country'},
    title='Did Rapid Urbanization Kill Agriculture? (Change 2008 → 2020)'
)
fig4.add_vline(x=0, line_color='black', line_width=1)
fig4.update_layout(yaxis_title='Top 10 Fastest Urbanizing Countries', xaxis_title='Change in Agriculture Share of GDP (p.p.)', template='plotly_white')
fig4.show()
