In [None]:
# Data taken and modified from http://databank.worldbank.org/data/home.aspx
# country codes from https://github.com/datasets/country-codes/blob/master/data/country-codes.csv
# svg from https://en.wikipedia.org/wiki/File:BlankMap-World6-Equirectangular.svg

In [None]:
import numpy as np
import csv

# read in country codes
with open('country_codes.csv') as csv_file:
    reader = csv.DictReader(csv_file)
    country_codes = dict([(line['name'], line['ISO3166-1-Alpha-2'].lower()) for line in reader])

# read in data
clist = []
with open('data.csv') as csv_file:
    reader = csv.reader(csv_file)
    header = next(reader)
    i = 0
    for line in reader:
        i += 1
        if line[2] in country_codes.keys():
            clist.append(line)

cols = [0, 1, 4, 12, 14, 15, 16, 19, 21]
header = ['Country Name', 'Country Code', 'GDP per capita (US$)', 'Internet users (per 100 people)', 'Life expectancy at birth (years)', 'Adult literacy rate (age 15+, %)', 'Unemployment (%)', 'CO2 emissions (tons per capita)', 'Population (Million)']

# organize data
country_dict = {}
for line in clist:
    cc = country_codes[line[2]]
    if cc not in country_dict.keys():
        try:
            country_dict[cc] = [line[2], cc] + [float(x) if x != '' else np.nan for x in line[4:]]
        except ValueError:
            print(line[4:])
            break
        continue
    country_dict[cc][0] = line[2]
    for i in range(4, len(line)):
        if line[i] != '':
            country_dict[cc][i-2] = float(line[i])

with open('world_data.csv', 'w') as outfile:
    writer = csv.writer(outfile)
    writer.writerow(header)
    for entry in country_dict.values():
        # pre-scale population!
        entry[21] /= 1000000.0
        writer.writerow([x for i, x in enumerate(entry) if i in cols])


In [None]:
from IPython.display import SVG, display_svg
from xml.etree import cElementTree as ET

def create_colored_map(color_mapping, scaling=0.7, display=True):
    """
    Takes a color mapping to create a world map with the specified 
    colors.
    For example:
    
    mapping = {'de': [1, 0, 0],
               'fr': [0, 1, 0]}
    create_and_display_colored_map(mapping)
    
    will create a worldmap and display it with Germany ('de') 
    colored red and France ('fr') colored green.
    Colors need to be iterables containing R G B values ranging
    from 0 (dark) to 1 (bright).
    
    The template used for the map can be found here:
    https://en.wikipedia.org/wiki/File:BlankMap-World6-Equirectangular.svg
    
    Args:
        mapping     A color mapping between country codes and colors.
        scaling     Scales the map by this factor. 
        display     If True, the image is displayed, if False, it is
                    returned.
    Returns:
        The svg image if display=False. Else nothing.
    """
    def color_css(color_mapping):
        tmpl = '.{0} {{fill: #{1:0>2x}{2:0>2x}{3:0>2x} !important;}}'
        scale = lambda x : [int(255 * i) for i in x]
        return '\n' + '\n'.join([tmpl.format(country.lower(), *scale(color)) for country, color in color_mapping.items()])

    tree = ET.parse('map.svg')
    style_element = tree.find('{http://www.w3.org/2000/svg}style')
    style_element.text = style_element.text + color_css(color_mapping)
    
    tree.getroot().attrib['height'] = str(float(tree.getroot().attrib['height']) * scaling)
    tree.getroot().attrib['width'] = str(float(tree.getroot().attrib['width']) * scaling)
    svg = SVG(data=ET.tostring(tree.getroot()).decode('UTF-8').replace('ns0:',''))
    if display:
        display_svg(svg)
    else:
        return svg


# Example for the colored map. (This causes problems with the coloring later on...)
# mapping = {'de': [1, 0, 0], 
#            'fr': [0, 1, 0], 
#            'us': [0, 0, 1]}
# create_colored_map(mapping, scaling=0.4)

In [None]:
import sys
import numpy as np
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt

class SelfOrganizingMap:
    
    def __init__(self, data, map_size=(20,20)):
        self.nodes = np.random.rand(map_size[0], map_size[1], data.shape[-1])
        self.data = data
    
    def theta(self, u, v, step, max_steps):
        max_shape = max(self.nodes.shape[0:2])
        r = max_shape * np.exp(-step / (max_steps / np.log(max_shape)))
        return np.exp(-np.linalg.norm(u - v) ** 2 / (2 * r ** 2))

    def alpha(self, step, max_steps):
        return 0.1 * np.exp(-step / (max_steps - step))

    def organize(self, steps):
        for step in range(steps):
            X = self.data[np.random.randint(0, len(self.data))]
            best_match = self.get_best_matching_index(X)

            for row_idx in range(self.nodes.shape[0]):
                for col_idx in range(self.nodes.shape[1]):
                    theta = self.theta(best_match, np.array([row_idx, col_idx]), step, steps)
                    alpha = self.alpha(step, steps)
                    delta = X - self.nodes[row_idx, col_idx]
                    self.nodes[row_idx, col_idx] += theta * delta * alpha
    
    def get_best_matching_index(self, X):
        reshaped_nodes = self.nodes.reshape(-1, self.nodes.shape[-1])
        distances = cdist(X[np.newaxis], reshaped_nodes)
        best_matching_1D_index = np.argmin(distances)
        return np.array(np.unravel_index(best_matching_1D_index, self.nodes.shape[0:2]))

    def __getitem__(self, key):
        return self.nodes[key]

    def plot(self):
        plt.imshow(self.nodes[:,:,0:3], interpolation='none')

In [None]:
%matplotlib inline
import csv
import itertools
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import Imputer as Imputer, scale as scale

# Generate color data.
colors = np.array(list(itertools.product([0, 0.5, 1], repeat=3)))

# Read world data.
with open('world_data.csv', 'r') as in_file:
    input_data = list(csv.reader(in_file))[1:]
countries = np.array([[d[0], d[1]] for d in input_data], dtype='str_')
country_data = np.array([d[2:] for d in input_data], dtype='float_')
country_data = scale(Imputer(axis=0).fit_transform(country_data), axis=0)

map_size = (20, 20)
steps = 200

# Create color map.
som_colors = SelfOrganizingMap(colors, map_size)
som_colors.organize(steps)

# Create country map.
som_countries = SelfOrganizingMap(country_data, map_size)
som_countries.organize(steps)

plt.figure('SOM')
plt.subplot(121).set_title('Colors')
som_colors.plot()
plt.subplot(122).set_title('Countries')
som_countries.plot()

In [None]:
# Match countries to colors.
mapping = {}
for i, country_d in enumerate(country_data):
    x, y = som_countries.get_best_matching_index(country_d)
    mapping[countries[i,1]] = som_colors[x,y]

# Plot map.
create_colored_map(mapping)