In [16]:
# --- Step 1: Import Libraries ---
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import folium
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
import os
import warnings

warnings.filterwarnings("ignore")

In [18]:
# --- Step 2: Load and Merge Data ---
print("--- Step 2: Loading and Merging Data ---")
df = pd.read_csv("../1_datasets/final_datasets/clean_merged_data.csv")
coords_url = "https://raw.githubusercontent.com/google/dspl/master/samples/google/canonical/countries.csv"
coords = pd.read_csv(coords_url)
coords = coords[["name", "latitude", "longitude"]].rename(columns={"name": "Country"})
df_merged = pd.merge(df, coords, on="Country", how="left")
df_merged.dropna(subset=["latitude", "longitude"], inplace=True)
print("✅ Data merged successfully.")

--- Step 2: Loading and Merging Data ---
✅ Data merged successfully.


In [19]:
# --- Step 3: Feature Selection ---
print("--- Step 3: Feature Selection ---")
features = df_merged[["SDI", "PM2.5", "All-cause DALYs"]]
scaler = StandardScaler()
scaled = scaler.fit_transform(features)

--- Step 3: Feature Selection ---


In [20]:
# --- Step 4: Clustering ---
print("--- Step 4: Clustering ---")
kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)
df_merged["Cluster"] = kmeans.fit_predict(scaled)
print(df_merged[["Country", "Year", "Cluster"]].head())

--- Step 4: Clustering ---
    Country  Year  Cluster
0  Botswana  2010        3
1  Botswana  2011        3
2  Botswana  2012        3
3  Botswana  2013        3
4  Botswana  2014        3


In [21]:
# --- Step 5: Creating World Map ---
print("--- Step 5: Creating World Map ---")
colors = ["red", "blue", "green", "purple"]
m = folium.Map(location=[20, 0], zoom_start=2, tiles="cartodbpositron")
for _, row in df_merged.iterrows():
    folium.CircleMarker(
        location=[row["latitude"], row["longitude"]],
        radius=5,
        color=colors[row["Cluster"] % len(colors)],
        fill=True,
        fill_color=colors[row["Cluster"] % len(colors)],
        fill_opacity=0.7,
        tooltip=folium.Tooltip(
            f"<b>Country:</b> {row['Country']}<br>"
            f"<b>Year:</b> {row['Year']}<br>"
            f"<b>Cluster:</b> {row['Cluster']}<br>"
            f"<b>SDI:</b> {row['SDI']}<br>"
            f"<b>PM2.5:</b> {row['PM2.5']}<br>"
            f"<b>DALYs:</b> {row['All-cause DALYs']}"
        ),
    ).add_to(m)

--- Step 5: Creating World Map ---


In [22]:
# --- Step 6: Add Legend ---
legend_html = """
<div style="position: fixed; 
     bottom: 50px; left: 50px; width: 260px; height: 140px; 
     border:2px solid grey; z-index:9999; font-size:14px;
     background-color:white; padding:10px;">
<b>🗺️ Cluster Legend</b><br>
🔴 <b>Red</b>: Cluster 0 — Low SDI, high PM2.5 and DALYs<br>
🔵 <b>Blue</b>: Cluster 1 — Mixed profile<br>
🟢 <b>Green</b>: Cluster 2 — High SDI, low PM2.5 and DALYs<br>
🔸 <b>Purple</b>: Cluster 3 — Unique small nations
</div>
"""
m.get_root().html.add_child(folium.Element(legend_html))

map_file = "world_map_clusters.html"
m.save(map_file)
print(f"✅ HTML saved: {map_file}")

✅ HTML saved: world_map_clusters.html


In [23]:
# --- Step 7: Saving World Map as PNG ---
print("--- Step 7: Saving World Map as PNG ---")

map_file = "world_map_clusters.html"
map_png = "world_map_clusters.png"

options = Options()
options.headless = True
options.add_argument("--window-size=1200,800")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")

# Use Service to avoid deprecated 'executable_path'
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

try:
    map_path = "file://" + os.path.abspath(map_file)
    driver.get(map_path)
    time.sleep(5)  # Give time for map to load
    driver.save_screenshot(map_png)
    print(f"✅ Map screenshot saved as: {map_png}")
except Exception as e:
    print(f"❌ Error saving screenshot: {e}")
finally:
    driver.quit()

--- Step 7: Saving World Map as PNG ---
✅ Map screenshot saved as: world_map_clusters.png


# Step 8: Cluster Map Interpretation

The map shows the results of **K-Means clustering** of countries based on:

- **Sociodemographic Index (SDI)**
- **Air pollution (PM2.5)**
- **Health burden (DALYs)**

Each color represents a different cluster of countries with similar profiles.

---

### 🎨 Cluster Color Legend

| Color    | Cluster | Description                                                                                   |
|----------|---------|-----------------------------------------------------------------------------------------------|
| 🔴 Red   | 0       | Countries with **low SDI**, **high PM2.5**, and **high health burden (DALYs)** — worst conditions |
| 🔵 Blue  | 1       | Countries with **developing or mixed profiles** — moderate SDI and PM2.5                      |
| 🟢 Green | 2       | **Highly developed countries** — high SDI, low pollution, and low health burden                |
| 🟣 Purple| 3       | **Geographically isolated or small island nations**, often with unique patterns or limited data |
