In [1]:
import cStringIO, csv, json, numpy, pandas, re, urllib2, urlparse

In [2]:
def read_csv(url):
    # If this is a raw google sheets URL, turn it into an export URL
    parsed = urlparse.urlparse(url)
    if parsed.netloc == 'docs.google.com' and parsed.path.split('/')[-1] != 'export' and not parsed.query:
        url = re.sub(r'#.*$', '', url)
        url = re.sub(r'/edit$', '', url)
        url += '/export?format=csv'
    return pandas.read_csv(urllib2.urlopen(url), dtype=numpy.str, na_filter=False, encoding='utf-8')

### Read city name to lat lon mapping from
https://docs.google.com/spreadsheets/d/1dhkpOdy6vP7UjcoxlX48okNapYr9qS7EuzNH6IB8cFA


In [3]:
city2latlon_url = 'https://docs.google.com/spreadsheets/d/1dhkpOdy6vP7UjcoxlX48okNapYr9qS7EuzNH6IB8cFA'
city2latlon = {}

for city_record in read_csv(city2latlon_url).values:
    (city, lat, lon) = city_record
    city = city.strip()
    city2latlon[city] = {'lat':float(lat), 'lon':float(lon)}
print 'Read %d city locations from %s' % (len(city2latlon), city2latlon_url)

assert 'Seattle' in city2latlon

Read 15 city locations from https://docs.google.com/spreadsheets/d/1dhkpOdy6vP7UjcoxlX48okNapYr9qS7EuzNH6IB8cFA


### Read country name to alpha3 mappings from

https://docs.google.com/spreadsheets/d/11Q9Gq-BNTElOsA4u5BdIcM8CWG-eWRT8c3I_8AQ03CM/edit#gid=0

In [4]:
countries_sheet_url = 'https://docs.google.com/spreadsheets/d/11Q9Gq-BNTElOsA4u5BdIcM8CWG-eWRT8c3I_8AQ03CM'

countries_sheet_data = read_csv(countries_sheet_url)

print 'Read %d entities from country sheet %s' % (len(countries_sheet_data), countries_sheet_url)

country_aliases = {}

for place in countries_sheet_data.values:
    alpha3 = place[1].strip()
    for alias in place[1:]:
        if len(alias):
            country_aliases[alias.lower().strip()] = alpha3

# Returns None if name isn't in the country list
# Returns '' if name is in the list but there's no code (typically a named region)
def map_country_to_alpha3(name):
    try:
        return country_aliases[name.lower()]
    except:
        return None

assert map_country_to_alpha3('af') == 'AFG'
assert map_country_to_alpha3(u'Åland Islands') == 'ALA'

Read 335 entities from country sheet https://docs.google.com/spreadsheets/d/11Q9Gq-BNTElOsA4u5BdIcM8CWG-eWRT8c3I_8AQ03CM


### Read CSV from file to translate
http://docs.google.com/spreadsheet/pub?key=pyj6tScZqmEcjeKHnZq6RIg&output=csv

In [5]:
# Returns {'name':name and optional 'lat':lat, 'lon':lon} 
# Returns None if no translation found
# Returns {} if we should omit place from output


def lookup_placename(name):
    ret = {}
    alpha3 = map_country_to_alpha3(name)
    if alpha3 != None:
        if alpha3 == '':
            return {}
        else:
            ret['name'] = alpha3
    elif name in city2latlon:
        ret['name'] = name
        ret['lat'] = city2latlon[name]['lat']
        ret['lon'] = city2latlon[name]['lon']
    else:
        return None
    return ret
    
assert lookup_placename('France') == {'name':'FRA'}
assert lookup_placename('Seattle') == {'name': 'Seattle', 'lat': 47.606209, 'lon': -122.332071}

In [8]:
def translate_csv(url):
    out = cStringIO.StringIO()
    writer = csv.writer(out)
    
    data = read_csv(url)
    # print 'Read %d rows from %s' % (len(data.values), url)

    header = list(data.columns.values)
    
    insert_latlon = not (header[1][0:3].lower() == 'lat' and header[2][0:3].lower() == 'lon')
    
    print 'insert_latlon is ', insert_latlon
    if insert_latlon:
        # Insert lat, lon columns
        header = header[0:1] + ['lat', 'lon'] + header[1:]
    
    writer.writerow(header)

    for row in data.values:
        row = list(row)
        placename = row[0].strip()
        
        if insert_latlon:
            row = row[0:1] + ['',''] + row[1:]

        # Only bother to look up placename if lat and lon are empty
        # Otherwise, trust lat and lon and export name as-is
        if row[1].strip() == '' and row[2].strip() == '':
            place = lookup_placename(placename)
        
            if place == None or place == {}:
                # No mapping, but send it through anyway in case we want to chart it
                place = {'name':placename}
        
            row[0] = place['name']
        
            if row[1].strip() == '' and row[2].strip() == '' and 'lat' in place:
                row[1] = place['lat']
                row[2] = place['lon']
            
        writer.writerow(row)
            
    return out.getvalue() 
  
# cities test    
# print translate_csv('https://docs.google.com/spreadsheets/d/1VK53HFXOENP435dwIQQvl7tgE0hPS10OYeuSPqqFmtw/edit#gid=1090282465')

# print translate_csv('http://docs.google.com/spreadsheet/pub?key=pyj6tScZqmEcjeKHnZq6RIg&output=csv')
# print translate_placenames('http://docs.google.com/spreadsheet/pub?key=phAwcNAVuyj1NHPC9MyZ9SQ&output=csv')
# translate_placenames('http://docs.google.com/spreadsheet/pub?key=phAwcNAVuyj0XOoBL_n5tAQ&output=csv')
# translate_placenames('http://docs.google.com/spreadsheet/pub?key=0ArfEDsV3bBwCdERNZmlfUGM5YVE3bmEwODdlRDFqSkE&output=csv')
# translate_placenames('http://data.cmucreatelab.org/earthtime/LuxembourgIncomeStudy/Gini_Coefficient.csv')
# translate_placenames('https://data.cmucreatelab.org/earthtime/IRENA/Solar.Electricity_capacity_MW.csv')
# translate_placenames('https://data.cmucreatelab.org/earthtime/IRENA/Wind.Electricity_capacity_MW.csv')
# translate_placenames('http://docs-proxy.cmucreatelab.org/spreadsheets/d/1RnCeiV0MnvGncVCoNT0LO_XXAt7Z1E60nOw9-hjTdWo/export?format=csv&id=1RnCeiV0MnvGncVCoNT0LO_XXAt7Z1E60nOw9-hjTdWo&gid=1043704810')

# translate_csv('https://docs.google.com/spreadsheets/d/1sropxj7sFcFfEXjcJvD0cfRY8UqEDz4r6rN_u210gyQ/edit#gid=217701898')

In [15]:
# translate_csv('https://docs.google.com/spreadsheets/d/1ETKhZHrnLIYE8DPpM5i6mO3nSpp5J4BvL14SZcN8-eM/edit')

insert_latlon is  False


"County Code,Latitude,Longitude,1953,1963,1964,1981,1982,1989,1990,1999,2000,2009,2010\r\nTaiwan,3.312572272,120.1059634,0,0,0,0,0,0,0,0,0,,\r\nZhuhai Xian,1.97623917,113.2944379,126146,126146,98324,98324,0,0,0,0,0,,\r\nNansha Qun Dao,0.936411491,112.3649771,0,0,0,0,0,0,0,0,0,,\r\nXisha Qun Dao,6.041370007,112.5277672,0,0,0,0,0,0,0,0,0,,\r\nZhongsha Qun Dao,5.226630309,117.7243281,0,0,0,0,0,0,0,0,0,,\r\nBeijing Shi,9.9119647152931),116.0223991,2768149,2768149,0,0,5258214,5258214,7362426,7362426,0,,\r\nTianjing Shi,9.001214558,117.3802569,2693831,2693831,0,0,0,0,0,0,0,,\r\nJingxing Xian,7.970807928,114.0495668,170866,170866,197642,197642,288596,288596,318298,318298,0,,\r\nZhending Xian,.14907343994985),114.5727979,301418,301418,0,0,0,0,0,0,0,,\r\nLuancheng Xian,7.852527768,114.6336796,183403,183403,180337,180337,277904,277904,330098,330098,0,,\r\nGaoyi Xian,7.565085536,114.6023434,79731,79731,99065,99065,134854,134854,164925,164925,0,,\r\nZanhuang Xian,7.584953708,114.2774472,97836,9783