# Get city locations from .svg file

* Take the original map image (e.g., USA_map.jpg)
* Using a vector graphics package (e.g., Inkscape) add circular markers at each city location
* Add a label to each city marker using the format City:city_name (e.g., City:Chicago)
* Save as .svg file
* This script finds the locations of these circles in the .svg file and saves them as a dictionary

In [1]:
# import modules, prepare path
import json
from pprint import pprint
import os

if os.path.basename(os.getcwd()) not in 'bin':
    raise EnvironmentError('Need to ensure that cwd is "bin"')

data_dir = os.path.join('..', 'data', 'USA')

In [12]:
# find cities in .svg file

in_cities = False # set to true when moving through text lines corresponding to a city
city_locations = {} # save city locations to a dictionary

f = open(os.path.join(data_dir, 'USA_map_with_cities.svg'), 'r')

for l in f: # loop over lines in file

    if l.find('viewBox=') != -1:
        # extract size of page
        i = l.find('viewBox=') + 9
        j = l[i:].find('"') + i
        nums = l[i:j]
        k = 0
        dims = []
        while k < 4:
            if k >= 2:
                dims.append(float(nums[0:nums.find(' ')]))
            nums = nums[nums.find(' ') + 1:]
            k += 1

    elif l.find('label="Cities"') != -1:
        # found a city block of text
        in_cities = True

    elif in_cities and l.find('</g>') != -1:
        # end of city block of text
        in_cities = False

    elif in_cities:
        if l.find('translate') != -1:
            # check whether circles are translated in x and y
            transform="translate(0,-61.094493)"
            i = l.find('translate(') + 10
            j = i + l[i:].find(',')
            k = j + l[j:].find(')')
            translate_x = float(l[i:j])/dims[0]
            translate_y = float(l[j+1:k])/dims[1]

        if l.find('<circle') != -1:
            # found a circle block
            name = ''
            cx = 0
            cy = 0

        elif l.find('</circle>') != -1:
            # end of circle block
            radius = (rx, ry)
            city_locations[name] = (cx - translate_x, cy - translate_y)

        elif l.find('cx=') != -1:
            # x position
            i = l.find('cx=') + 4
            j = i + l[i:].find('"')
            cx = float(l[i:j])/dims[0]

        elif l.find('cy=') != -1:
            # y position
            i = l.find('cy=') + 4
            j = i + l[i:].find('"')
            cy = 1 - float(l[i:j])/dims[1]

        elif l.find('City:') != -1:
            # city name
            i = l.find('City:') + 5
            j = l.find('</title>')
            name = l[i:j]

        elif l.find('r=') != -1:
            # circle radius
            i = l.find('r=') + 3
            j = i + l[i:].find('"')
            rx = float(l[i:j])/dims[0]
            ry = float(l[i:j])/dims[1]

f.close()

In [13]:
# save as json
with open(os.path.join(data_dir, 'city_locations.json'), 'w') as fp:
    json.dump(city_locations, fp, sort_keys=True, indent=4)

In [14]:
pprint(city_locations)
print('radius = ', radius)
print('translate = ', translate_x, translate_y)

{'Atlanta': (0.7805523828125, 0.3668687598828697),
 'Boston': (0.94517291015625, 0.7935577642752563),
 'Calgary': (0.234723779296875, 0.8714704143484626),
 'Charleston': (0.87193072265625, 0.3564106925329429),
 'Chicago': (0.68394244140625, 0.5964232693997071),
 'Dallas': (0.55454796875, 0.22097883308931188),
 'Denver': (0.390276162109375, 0.4510562122986823),
 'Duluth': (0.56361603515625, 0.6858397408491947),
 'El Paso': (0.377720390625, 0.18542142459736466),
 'Helena': (0.33272876953125, 0.6774732693997072),
 'Houston': (0.59535431640625, 0.1629365344070277),
 'Kansas City': (0.554896708984375, 0.47720138067349926),
 'Las Vegas': (0.20751951171875, 0.3354946310395316),
 'Little Rock': (0.623256083984375, 0.3443839136163982),
 'Los Angeles': (0.14474048828125, 0.25026140995607615),
 'Miami': (0.90366900390625, 0.1252875007320644),
 'Montreal': (0.875418486328125, 0.8777451727672035),
 'Nashville': (0.731026708984375, 0.418636168374817),
 'New Orleans': (0.68603509765625, 0.18019233235