<a href="https://colab.research.google.com/github/Mickymick23/GEOG5003M/blob/main/report.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
### import required packages, data and images

## packages

# install non-native packages
!pip install mapclassify # colab users only

# import packages
import pandas as pd # for data manipulation
import geopandas as gpd # to manipulate geospatial data
import seaborn as sns # for statistical data visualisation
import matplotlib.pyplot as plt # for plotting control
from matplotlib.ticker import FixedLocator, FixedFormatter # control axis ticks and labels
from matplotlib.offsetbox import OffsetImage, AnnotationBbox # to embed icons
from sklearn import cluster # to cluster the data
from sklearn.cluster import KMeans # add the kmeans clustering aglorithm
from sklearn.metrics import silhouette_score # to measure cluster seperability
from PIL import Image # open images
import urllib.request # to read URLs
import numpy as np # for numerical computing

## data

# load shapefile of map of world countries
countries=gpd.read_file('https://github.com/Mickymick23/GEOG5003M/raw/refs/heads/main/data/world-administrative-boundaries.shp')

# load csv file of the 2020 metrics in Estes Weighted Index of Social Progress
WISP=pd.read_csv('https://github.com/Mickymick23/GEOG5003M/raw/refs/heads/main/data/WISP.csv')

In [None]:
### data exploration and pre-processing

## exploration

# get an understanding of the data by:

# seeing the first five rows for general grasp
print(countries.head())
print(WISP.head())

# analysing the summary statistics (counts, unique values, mean, median, mode, standard deviation, etc)
print(countries.describe())
print(WISP.describe())

# identifying the unexpected country code values that are not unique
print(countries['iso_3166_1_'].value_counts())
print(countries[countries['iso_3166_1_'].isin(['PT', 'PS'])])

# displaying:
  # - the different columns headings (variables)
  # - total rows
  # - those rows from each column that are null
  # - each row's data type
print(countries.info())
print(WISP.info())

# seeing the null values of each column
print(countries.isnull().sum())
print(WISP.isnull().sum())

# creating box plots to identify any outliers marked as 'x', dropping the 'year' column to visualise better
WISP.drop(columns='year').boxplot()
plt.xticks(rotation=90)
plt.show()

# identifying mismatched ISO 3166-1 country codes
pd.set_option('display.max_rows', None) # display all rows
mismatched_WISP=WISP[~WISP['country_code'].isin(countries['iso_3166_1_'])]
print(mismatched_WISP[['country_code', 'country']])
mismatched_countries=countries[~countries['iso_3166_1_'].isin(WISP['country_code'])]
print(mismatched_countries[['iso_3166_1_', 'name']])

## pre-processing

# match country codes which exist in both dataframes, but are different (Namibia only)
WISP['country_code']=WISP['country_code'].fillna('NA')

# remove regional entries in the 'WISP' dataframe (e.g. European Union, North America)
WISP=WISP[WISP['country_code'].isin(countries['iso_3166_1_'])]

# change Azores country code to avoid duplicate data
countries.loc[countries['name']=='Azores Islands', 'iso_3166_1_']='PT (Azores Islands)'

# join the dataframes using the country codes
WISP_index=pd.merge(countries, WISP, left_on='iso_3166_1_', right_on='country_code', how='left')

# remove unnecessary columns
WISP_index=WISP_index.drop(columns=['iso3', 'status', 'color_code', 'french_shor', 'year'])

# remove unnecessary underscore at end to neaten column title
WISP_index=WISP_index.rename(columns={'iso_3166_1_': 'iso_3166_1'})

# check CRS
print(WISP_index.crs)

# reprogect the dataset to EPSG:3857 for global fit
WISP_index=WISP_index.to_crs(epsg=3857)

In [None]:
### exploratory analysis and modelling: cluster creation

## exploration

# define list of subindexes
subindex_list=[
    'education',
    'health',
    'women_status',
    'defence_effort',
    'economic',
    'demography',
    'environmental',
    'social_chaos',
    'cultural_cohesion',
    'welfare_effort'
]

# create a Spearman's rank correlation to gauge relationships between variables
print(WISP_index[subindex_list].corr(method='spearman'))

# geospatially visualise each subindex:
# plot a map of each subindex in the list
for i in range(0, len(subindex_list)):
    # produce a plot
    fig, ax=plt.subplots(1, 1, figsize=(10, 10))
    WISP_index.plot(
        # get the ith item in the list of subindexes
        column=subindex_list[i],
        # show the legend
        legend=True,
        # define the legend palette
        cmap='RdBu',
        # use the predefined axis
        ax=ax,
        # decrease the legend size
        legend_kwds={'shrink': 0.3}
    )
    # add respective title for each subindex and neaten by capitalising, removing underscores and adding a suffix
    plt.title(subindex_list[i].replace('_',' ').title()+' Subindex')
    # remove axes
    plt.axis('off')
    # display plot
    plt.show()
########## above code block adapted from GEOG5003M  Week 7, https://github.com/FrancescaPontin/GEOG5003M/blob/main/notebooks/Week_7_Spatial_Analysis.ipynb ##########

# identify number of clusters using the elbow method:
# define variable to prepare data by selecting the subindexes, dropping missing values, and converting to a NumPy array
X=WISP_index[subindex_list].dropna().values
# create an empty list to fill later
sum_of_squared_distances=[]
# get a range of numbers from 1 to 20
K=range(1,20)
# for each number in the range create a k-means model
for k in K:
    # set random state
    km=cluster.KMeans(n_clusters=k, init='random', random_state=123)
    # fit the model with the 10 subindexes dropping null values
    km.fit(X)
    # calculate sum of the squared distances and add to empty list
    sum_of_squared_distances.append(km.inertia_)
# plot the sum of squared distances against the number of clusters using crosses
plt.plot(K, sum_of_squared_distances, 'bx-')
# assign appropriate titles/labels
plt.xlabel('# of Clusters (k)')
plt.ylabel('Sum of Squared Distances')
plt.title('Elbow Method For Optimal k')
# make x labels integer
plt.xticks(K)
# display plot
plt.show()
########## above code block adapted from GEOG5003M  Week 7, https://github.com/FrancescaPontin/GEOG5003M/blob/main/notebooks/Week_7_Spatial_Analysis.ipynb ##########

# expand on elbow method using silhouette score:
# create an empty list to fill later
silhouette_scores=[]
# create range of cluster numbers to test from 2 to 10
K=range(2, 10)
# for each number in the range create a silhouette score
for k in K:
    # set random state
    km=KMeans(n_clusters=k, init='random', random_state=123)
    # fit the model with the subindexes dropping null values
    km.fit(X)
    # create score for current cluster results
    score=silhouette_score(X, km.labels_)
    # add scores to empty list
    silhouette_scores.append(score)
# plot silhouette scores against the number of clusters using crosses
plt.plot(K, silhouette_scores, 'bx-')
# assign appropriate titles/labels
plt.xlabel('# of Clusters (k)')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Score vs. # of Clusters')
# display plot
plt.show()

## modelling - create clusters

# run a K Means model with 6 clusters
km6=cluster.KMeans(n_clusters=6, init='random', random_state=123)
km6.fit(X)

# assign clusters back to main dataframe
WISP_index.loc[WISP_index[subindex_list].dropna().index, 'Cluster']=km6.labels_

In [None]:
### exploratory analysis and interpretation: cluster categorisation

## exploration

# pairplot to see how each cluster behaves in relation to another
sns.pairplot(
    # compare all subindexes with each other
    WISP_index[subindex_list + ['Cluster']],
    # assign colour pallette
    palette='Dark2',
    # colour by cluster
    hue='Cluster'
);

# map out the clusters to gain geographical insight:
# produce a plot
f, ax=plt.subplots(1, figsize=(9, 9))
WISP_index.plot(
    # plot by cluster
    column='Cluster',
    # treat data as discrete groups
    categorical=True,
    # assign legend
    legend=True,
    # apply plot to axis
    ax=ax
)
# remove axis
ax.set_axis_off()
# display plot
plt.show()

# numerically visualise cluster medians by subindex:
# use groupby to get the median value of subindexes by cluster
WISP_index_clusters_median=WISP_index.groupby('Cluster')[subindex_list].median().reset_index()
print(WISP_index_clusters_median)

# spatially visualise cluster medians by subindex:
# melt the data from wide to long format to plot
WISP_index_melted=pd.melt(WISP_index_clusters_median,
                               id_vars='Cluster',
                               value_vars=subindex_list)
# keep relevant columns
WISP_index_melted.columns=['Cluster','Subindex Name','Median Subindex']
# plot bar chart faceted by cluster
sns.catplot(WISP_index_melted,
            # facet by cluster
            row='Cluster',
            # assign x and y variables
            y='Subindex Name',
            x='Median Subindex',
            # make it a bar chart
            kind='bar',
            # adjust sizes
            aspect=4,
            # colour by median value of subindex
            hue='Median Subindex',
            # colour by interpretable pallette
            palette='autumn')
########## code adapted from GEOG5003M  Week 7, https://github.com/FrancescaPontin/GEOG5003M/blob/main/notebooks/Week_7_Spatial_Analysis.ipynb ##########

## interpretation - name clusters

# create empty column for original dataframe
WISP_index['cluster_description']=''

# edit to add cluster descriptions
WISP_index.loc[WISP_index['Cluster']==0,'cluster_description']='Socially Stalled Powerhouses'
WISP_index.loc[WISP_index['Cluster']==1,'cluster_description']='Well Established Reformers'
WISP_index.loc[WISP_index['Cluster']==2,'cluster_description']='War Scarred Societies'
WISP_index.loc[WISP_index['Cluster']==3,'cluster_description']='Hard Security States'
WISP_index.loc[WISP_index['Cluster']==4,'cluster_description']='Eco Friendly Agrarians'
WISP_index.loc[WISP_index['Cluster']==5,'cluster_description']='Progressive Path Finders'
WISP_index.loc[WISP_index['Cluster'].isna(), 'cluster_description']='Without Data'

In [None]:
### non-spatial visualisation - Median Subindex Values of Clusters within Estes Weighted Index of Social Progress

## import icons and define lists

# import icons
icon_dict={
    'education': 'https://github.com/Mickymick23/GEOG5003M/raw/refs/heads/main/images/education-icon.png',
    'health': 'https://github.com/Mickymick23/GEOG5003M/raw/refs/heads/main/images/health-icon.png',
    'women_status': 'https://github.com/Mickymick23/GEOG5003M/raw/refs/heads/main/images/women-status-icon.png',
    'defence_effort': 'https://github.com/Mickymick23/GEOG5003M/raw/refs/heads/main/images/defense-effort-icon.png',
    'economic': 'https://github.com/Mickymick23/GEOG5003M/raw/refs/heads/main/images/economics-icon.png',
    'demography': 'https://github.com/Mickymick23/GEOG5003M/raw/refs/heads/main/images/demography-icon.png',
    'environmental': 'https://github.com/Mickymick23/GEOG5003M/raw/refs/heads/main/images/environment-icon.png',
    'social_chaos': 'https://github.com/Mickymick23/GEOG5003M/raw/refs/heads/main/images/social-chaos-icon.png',
    'cultural_cohesion': 'https://github.com/Mickymick23/GEOG5003M/raw/refs/heads/main/images/cultural-diversity-icon.png',
    'welfare_effort': 'https://github.com/Mickymick23/GEOG5003M/raw/refs/heads/main/images/welfare-effort-icon.png'
}

# define pallette to match WISP subindex icons
subindex_colors={
    'education': '#364A99',         # dark blue
    'health': '#DB2525',            # red
    'women_status': '#EB8DC6',      # pink
    'defence_effort': '#000000',    # black
    'economic': '#F5C602',          # yellow
    'demography': '#F7BB6D',        # peach
    'environmental': '#A2DE9B',     # light green
    'social_chaos': '#FF7F00',      # orange
    'cultural_cohesion': '#9B50A6', # purple
    'welfare_effort': '#B7DAED'     # light blue
}

## exploration and pre-processing

# identify smallest three outlier values across all subindexes
WISP_index_melted['Median Subindex'].nsmallest(3)

# merge cluster names from main dataframe to the melted dataframe avoiding duplicate rows
WISP_index_melted_merged=WISP_index_melted.merge(
    WISP_index[['Cluster', 'cluster_description']].drop_duplicates(),
    on='Cluster',
    how='left'
)

# shift data values up 10 so minumum is above 0, truncating the extreme outlier
WISP_index_melted_merged['Median Clipped']=(WISP_index_melted_merged['Median Subindex'] + 10).clip(lower=0)

## plotting

# create a categorical plot
g=sns.catplot(
    # add data
    data=WISP_index_melted_merged,
    # create subplots for each cluster
    col='cluster_description',
    # organise subplots into two rows (or three columns)
    col_wrap=3,
    # adjust the width/height ratio
    aspect=0.75,
    # assign x and y variables
    x='Subindex Name',
    y='Median Clipped',
    # make it a bar chart
    kind='bar',
    # colour by subindex
    hue='Subindex Name',
    # assign colours based on WISP subindex icons
    palette=subindex_colors,
    # include legend to identify subindex
    legend=True
)

# organise main text elements:
# add a title
g.fig.suptitle(
    # define title
    'Median Subindex Values of Clusters within Estes Weighted Index of Social Progress',
    # adjust font size
    fontsize=14,
    # move slightly higher
    y=1.02,
    # set bold test
    fontweight='bold'
)
# remove x axis label
g.set_xlabels('')
# remove repeated y axis labels
for ax in g.axes.flat:
    ax.set_ylabel('')
# add singular y axis label to avoid clutter
g.fig.text(
    # set x and y
    -0.01, 0.4,
    # set name
    'Median Subindex Value',
    # rotate 90 degrees
    rotation='vertical',
    # adjust size
    fontsize=11)

# adjust legend:
# move legend to right hand side of plot
g._legend.set_bbox_to_anchor((1.194, 0.5))
# assign legend title
g._legend.set_title('Subindex Name')
# make title bold
g._legend.get_title().set_fontweight('bold')
# remove underscores and capitalise subindexes in legend
for txt in g._legend.texts:
    txt.set_text(txt.get_text().replace('_', ' ').title())

########## ChatGPT 4.1 ##########
# The following prompt was used in ChatGPT to make it so that tick values could be adjusted manually:
# “How do I adjust the tick values manually?"
# An example was given which was adapted into the following code.

# adjust ticks:
# loop through each subplot
for ax in g.axes.flat:
    # get tick positions
    ticks=ax.get_yticks()
    # lock tick positions
    ax.yaxis.set_major_locator(FixedLocator(ticks))
    # shift labels down by 10
    ax.yaxis.set_major_formatter(FixedFormatter([f'{t - 10:.1f}' for t in ticks]))
    # remove x axis ticks and labels
    ax.tick_params(bottom=False, labelbottom=False)
    # adjust cluster names
    ax.set_title(ax.get_title().replace('cluster_description = ', ''))

########## ChatGPT 4.1 ##########
# The following prompt was used in ChatGPT with regards to handling the icons:
# “How do I replace bar chart x-axis labels with custom labels for each bar and add icons next to the legend labels?"
# Examples were given for each task which were adapted significantly into the following code.

# add icons to replace x axis labels:
# loop through each subplot
for ax in g.axes.flat:
    # get cluster names of each subplot
    cluster=ax.get_title()
    # filter data relevant to specific cluster
    cluster_data=WISP_index_melted_merged[WISP_index_melted_merged['cluster_description']==cluster]
    # match bars with rows of data so that icons can be placed
    for bar, (_, row) in zip(ax.patches, cluster_data.iterrows()):
        # get bar position
        x=bar.get_x() + bar.get_width() / 2
        # retrieve icon via its subindex name using the dictionary
        icon_url=icon_dict.get(row['Subindex Name'])
        # if url exists...
        if icon_url:
                # ...load image as a NumPy array to render images, and...
                img=np.array(Image.open(urllib.request.urlopen(icon_url)))
                # ...adjust properties of image, and...
                ab=AnnotationBbox(
                    # adjust size
                    OffsetImage(img, zoom=0.24),
                    # assign location
                    (x, 0.025),
                    # interpret x as data (the subindex) and y as a fractional distance from axis
                    xycoords=('data', 'axes fraction'),
                    # remove border
                    frameon=False
                )
                # ...add image to subplot
                ax.add_artist(ab)

# add icons next to legend for reference:
# loop through the list of subindexes extracting the keys
for i, key in enumerate(subindex_list):
    # if key is the same as the icon dictionary's...
    if key in icon_dict:
            # ...load image, and...
            img = Image.open(urllib.request.urlopen(icon_dict[key]))
            # ...adjust properties of image, and...
            ab = AnnotationBbox(
                # adjust size
                OffsetImage(img, zoom=0.2),
                # give coords relative to the ith legend label
                (1.053, 0.585 - i * 0.021),
                # locate relative to entire figure
                xycoords='figure fraction',
                # remove frame
                frameon=False
            )
            # ...add image to legend
            g.fig.add_artist(ab)

# add footnote marker:
# loop through each subplot
for ax in g.axes.flat:
    # if clause - if title is 'Hard Security States'...
    if ax.get_title()=='Hard Security States':
        # ...add marker
        ax.text(
            # set x, y and the marker with its size, colour and weight
            2.85, 2,
            '!',
            fontsize=18,
            color='red',
            fontweight='bold'
        )

# add footnote and footnote reference marker:
# footnote reference marker - set x, y, marker, size, colour, weight
g.fig.text(
    1.02, 0.35,
    '!',
    fontsize=12,
    fontweight='bold',
    color='red'
)
# footnote text - set x, y, text, size, colour
g.fig.text(
    1.03, 0.293,
    'Note: The "Hard Security States" \ncluster contains an extreme outlier of \n-59.6 for '
    '"Defense Effort". It has been \ntruncated to -10 to maintain clarity \nof the plot.',
    fontsize=9,
    color='grey'
)

# make sure elements do not overlap
plt.tight_layout()

# save plot
g.fig.savefig('wisp_non_spatial_visualisation.png')

# show the plot
plt.show()

In [None]:
### spatial visualisation - Distribution of Clusters within Estes Weighted Index of Social Progress

## define lists

# assign colourblind colours for map
map_colours=[
    '#F0E442',
    '#CC79A7',
    '#009E73',
    '#E69F00',
    '#D55E00',
    '#0072B2',
    '#999999'
]

## pre-processing

# remove unnecessary 'iso_3166_1' and 'cluster' columns
WISP_index = WISP_index.drop(columns=['iso_3166_1', 'Cluster']).copy()

# rename columns for neatness, except geometry which is used for mapping:
def col_rename(col):
    # if geometry column, keep the same
    if col == 'geometry':
        return col
    # if WISP column, capitalise
    if col == 'wisp':
        return 'WISP'
    # otherwise, remove underscores and capitalise first letters of words
    else:
        return col.replace('_', ' ').title()
# rename dataframe using rename function
WISP_index = WISP_index.rename(columns={col: col_rename(col) for col in WISP_index.columns})

# define columns to use on the popups
popup_columns=[col for col in WISP_index.columns if col !='geometry']

## plotting

# plot map organising
map=WISP_index.explore(
    # colour by cluster description
    column='Cluster Description',
    # treat as categorical for colouring
    categorical=True,
    # use predefined colourblind pallette
    cmap=map_colours,
    # adjust base map tiles
    tiles='CartoDB Positron',
    # show relevant columns
    popup=popup_columns,
    # adjust zoom so that repeated countries won't appear
    zoom_start=3
)

# save to html file
map.save('spatial_visualisation.html')

# open the map
map

# Note: interactive map would be embedded elsewhere and the following title and contextual introducation would be added via html
# Title - Distribution of Clusters within Estes Weighted Index of Social Progress
# Introducation - The following markdown cell contains the spatial distribution of defined clusters. Hover over or click on a country to see its different subindex values

#GEOG5003M - Project

##Introduction

Modern measures of development such as the Human Development Index (HDI) use national averages like life expectancy, GDP per capita and education years. These indicators often obscure the individual socioeconomic experience, with national averages failing to capture inequalities within countries. Before the HDI grew in popularity, in the 1970s Richard J Estes established the Weighted Index of Social Progress (WISP) (MIQOLS, 2020), focusing on individual experience rather than national figures.

This project aims to determine whether Estes’  WISP meaningfully represents human experience by creating clusters beyond the standard ‘developed, developing and underdeveloped’ categorisation, helping answer the following research question:

Is Estes’ Weighted Index of Social Progress an Effective Indicator of Human Experience?

##Intended Audience

The intended audience for a human development project is vast, with academics, the public, policy makers and business stakeholders all having use cases - a more nuanced understanding of human development could support decisions across many fields. For example, NGOs focused on health or education may wish to know areas where significant social deficiencies lay, despite economic success of a country. Microfinance organisations may want to identify finance-poor areas that are not conflict-ridden. While potential migrants hoping to start a new life may wish to consult the index to verify that economic advancement does not necessarily correlate with social wellbeing and the society may be socially regressive.

The first visualisation shows how each cluster is formed. The second shows global patterns spatially, revealing trends on how countries group together based on social indicators rather than economic averages.
Justification of Investigation, Modelling and Interpretation

##Data Sourcing and Pre-Processing

WISP and country boundary data were sourced from open-source locations to ensure reproducibility. Initial exploration revealed several key data issues that could impact the validity and reliability of the analysis. These were addressed using standardisation (common format) and normalisation (minimised redundancy) techniques (DataCalculus, 2025).

Column headers were generally uniform - lowercase, with underscores – less one, which was changed. Redundant columns removed. Mismatched country codes were identified. Non-country entries like "North America" or "Developed Countries" were removed from the data. Namibia’s code "NA" had been converted to a null and was reverted to string form.

Generally, territories and microstates lacked WISP data, but were retained in the spatial dataset and later greyed out on the map for completeness. Duplicate country codes, like Portugal and the Azores sharing  ‘PT’ were resolved by renaming the Azores to ‘PT (Azores Islands)’. The cleaned data was merged using country codes, and the CRS was adjusted for global fit.

##Cluster Creation – Exploratory Analysis and Modelling

So that relationships among WISP subindices can be assessed, Spearman’s rank correlation was used—preferable to Pearson’s due to its robustness against outliers seen in the data (Analytics Yogi, 2025). The spatial distribution of each subindex was mapped globally for initial insight.

In order to cluster the data, the Elbow Method was first used to identify the point at which increasing cluster numbers yielded diminishing returns. As expected, a distinct "elbow" suggested a three-cluster solution, echoing traditional development categories. However, this project’s goal was to go beyond such simplifications.

Kodinariya et al. (2013) recommends combining the Elbow Method with the Silhouette Score to find a more reliable cluster count. The Silhouette Score, which balances intra-cluster cohesion with inter-cluster separation, revealed a plateau after the first drop from three clusters, with another drop after six. This led to the selection of six clusters.

Murphy (2012) proposes that a K-Means cluster model allows for unsupervised machine learning, i.e. without needing predefined labels: it’s the clusters that will actually later define the labels. A K-Means model was then processed with six clusters.

##Cluster Categorisation – Exploratory Analysis and Interpretation

A pair-plot of subindices revealed clear trends. One small cluster stood out, made up of just Libya and Eritrea. Whilst this might suggest over-clustering, these countries’ outlier - military spending - is extremely high, justifying their separation. Eritrea, for example, spent 35.46% of its GDP on military expenditure in 2020 (countryeconomy.com, 2025), compared to the global average of 2.4% (SIPRI, 2021). A unique case like this warrants its own category regarding authoritarian governance and militarisation.

Other clusters also revealed certain patterns. One grouped similar war-torn nations like Afghanistan, Iraq, Somalia, Sudan, and Syria. Unexpectedly, Brazil appeared in the most advanced cluster, while China was clustered with less developed countries - despite both being developing BRICS economies. This distinction illustrates the value of WISP -  while China outperforms economically, Brazil outperforms socially, particularly on women’s rights, military presence and environmental progress. This distinction highlights how development trajectories differ despite being among nations of similar economic status. While China’s growth is likely state-led and based on infrastructure investment, Brazil’s is likely is more from civil rights and environmental initiatives.

These results clearly support the idea that WISP captures more complex and meaningful social patterns that GDP-based indices miss. Cluster labels were assigned based on their median subindex values.
Non-Spatial Visualisation

The first visualisation is a faceted bar chart showing subindex medians by cluster. It helps to understand each group’s characteristics and interpret the spatial map.

Pre-processing involved identifying the most significant outliers: -59.6 and -8.35. The extreme value distorted scale and was truncated with a footnote for user context. The remaining data was adjusted by adding 10 to all values, allowing bars to sit along the x-axis. The y-axis was extended below zero, justified by the fact that all data were z-score standardised and therefore only meaningful in comparative terms.

The number of bars meant that to colouring using the colourblind palette became an issue due to similar shades, to circumvent this, other visual cues could be used, like icons (Medium, 2021). This approach improved clarity for colourblind users while enhancing general visual comprehension. Since they weren’t relying solely on shades, users could match icons and colours, avoiding the need to frequently cross-reference a legend.

##Spatial Visualisation

The second visualisation is an interactive, colour-coded world map showing each country’s cluster. A colourblind palette from Wong (Nichols, 2025) was used for accessibility and clear distinction among all six clusters. Using a colourblind-friendly palette also helped differentiate this visualisation from the bar chart avoiding colour duplication.

Interactive elements such as pop-ups provide specific country subindex data for greater context. Due to minor misalignments between shapefile and basemap borders, a low-contrast basemap was used to reduce visual inconsistencies, though they are not significant.

Although a static map was considered for clarity and text elements, the interactive version added more value by allowing users to explore subindex scores. Instead, contextual notes and a legend were added in the command cell.

##Conclusion

This project has seen the full data science process (Gupta, 2022) – collecting high-quality, open-access historical data; cleaning it to ensure common format and minimised redundancy for analysis; performing exploratory data visualisation and correlation analysis and adopting multiple techniques to determine cluster numbers; modelling using a K-Means clustering algorithm; and creating visualisations to support interpretation and deployment of data.
All outputs adhered to the FAIR data principles (OpenAIRE, 2025) via the use of consistent comments, links, citations and references, and well-structured code. The code could be reused to explore WISP data across decades, enabling temporal studies of social development offering insights into whether development is increasing, decreasing, or staying the same, or whether it is sustained or cyclical.

Although WISP is less popular today, overshadowed by GDP-focused indices like HDI, its long history and emphasis on social progress give it a distinct use case. It offers a deeper understanding of human development, capturing dimensions that GDP metrics miss - such as governance, freedom, and social equality.

To answer the research question: Is WISP an effective indicator of human experience? This project suggests it is. The clusters reveal clear social patterns often hidden by economic measures, and visualisations confirm WISP’s ability to emphasise nuanced social differences. Its continued use could help provide better-informed decisions across policy, migration, finance, and academic research.

While no single index can perfectly grasp human development, WISP offers a rare perspective by valuing social equality, governance, and cohesion as much as economic metrics. It emphasises lived experience in a world where metrics are defined by averages and totals.

(1354 words)

##References

Analytics Yogi. 2025. Pearson vs Spearman: Choosing the Right Correlation Coefficient. [Online]. [Accessed 30 July 2025]. Available from: https://vitalflux.com/pearson-vs-spearman-choosing-the-right-correlation-coefficient/

countryeconomy.com. Eritrea - General government expenditure. [Online]. [Accessed 30 July 2025]. Available from: https://countryeconomy.com/government/expenditure/eritrea

DataCalculus. 2025. Best Practices for Data Merging in Business Intelligence and Data Analytics. [Online]. [Accessed 30 July 2025]. Available from: https://datacalculus.com/en/knowledge-hub/data-analytics/data-cleaning-and-preprocessing/best-practices-for-data-merging/

Gupta, S. 2022. Data Science Process: A Beginner’s Guide in Plain English. [Online]. [Accessed 30 July 2025]. Available from: https://www.springboard.com/blog/data-science/data-science-process/

Kodinariya, T. M., & Makwana, P. R.  2013. Review on determining the number of clusters in K-means clustering. International Journal of Advance Research in Computer Science and Management Studies [Online], 1(6), 90–95. [Accessed 30 July 2025]. Available from: https://www.researchgate.net/publication/313554124_Review_on_Determining_of_Cluster_in_K-means_Clustering

Medium. 2021. Seven guiding principles for accessible data visualizations. [Online]. [Accessed 30 July 2025]. Available from: https://medium.com/%40joelbethell/7-principles-for-accessible-data-visualizations-bf0b1dbd054e

MIQOLS. 2020. Estes Weighted Index of Social Progress. MIQOLS. [Online]. [Date Accessed 20 Jul 25]. Available from: https://public.opendatasoft.com/explore/dataset/world-administrative-boundaries/information/

Murphy, K. P.  2012. Machine Learning: A Probabilistic Perspective. MIT Press.

Nichols, D. 2025. Coloring for Colorblindness. [Online]. [Accessed 30 July 2025]. Available from: https://davidmathlogic.com/colorblind/#%23648FFF-%23785EF0-%23DC267F-%23FE6100-%23FFB000

OpenAIRE. 2025. How to make your data FAIR. [Online]. [Accessed 30 July 2025]. Available from: https://www.openaire.eu/how-to-make-your-data-fair

opendatasoft. 2024. World Administrative Boundaries - Countries and Territories. World Food Programme (UN agency). [Online]. [Accessed 20 Jul 25]. Available from: https://www.miqols.org/toolbox/isp.html

SIPRI. 2021. SIPRI Yearbook 2021 8. Military expenditure. [Online]. [Accessed 30 July 2025]. Available from: https://www.sipri.org/yearbook/2021/08

#Acknowledgements
### I acknowledge the use of ChatGPT-4.1 (Open AI, https://chat.openai.com/) to support development of code to learn how to adjust tick labels and place icons