In [76]:
import cStringIO, csv, json, numpy, pandas, urllib2, urlparse

In [77]:
def read_csv(url):
    # If this is a raw google sheets URL, turn it into an export URL
    parsed = urlparse.urlparse(url)
    if parsed.netloc == 'docs.google.com' and parsed.path.split('/')[-1] != 'export' and not parsed.query:
        url += '/export?format=csv'
    return pandas.read_csv(urllib2.urlopen(url), dtype=numpy.str, na_filter=False, encoding='utf-8')

### Read city name to lat lon mapping from
https://docs.google.com/spreadsheets/d/1dhkpOdy6vP7UjcoxlX48okNapYr9qS7EuzNH6IB8cFA


In [78]:
city2latlon_url = 'https://docs.google.com/spreadsheets/d/1dhkpOdy6vP7UjcoxlX48okNapYr9qS7EuzNH6IB8cFA'
city2latlon = {}

for city_record in read_csv(city2latlon_url).values:
    (city, lat, lon) = city_record
    city = city.strip()
    city2latlon[city] = {'lat':float(lat), 'lon':float(lon)}
print 'Read %d city locations from %s' % (len(city2latlon), city2latlon_url)

assert 'Seattle' in city2latlon

Read 15 city locations from https://docs.google.com/spreadsheets/d/1dhkpOdy6vP7UjcoxlX48okNapYr9qS7EuzNH6IB8cFA


### Read country name to alpha3 mappings from

https://docs.google.com/spreadsheets/d/11Q9Gq-BNTElOsA4u5BdIcM8CWG-eWRT8c3I_8AQ03CM/edit#gid=0

In [79]:
countries_sheet_url = 'https://docs.google.com/spreadsheets/d/11Q9Gq-BNTElOsA4u5BdIcM8CWG-eWRT8c3I_8AQ03CM'

countries_sheet_data = read_csv(countries_sheet_url)

print 'Read %d entities from country sheet %s' % (len(countries_sheet_data), countries_sheet_url)

country_aliases = {}

for place in countries_sheet_data.values:
    alpha3 = place[1].strip()
    for alias in place[1:]:
        if len(alias):
            country_aliases[alias.lower().strip()] = alpha3

# Returns None if name isn't in the country list
# Returns '' if name is in the list but there's no code (typically a named region)
def map_country_to_alpha3(name):
    try:
        return country_aliases[name.lower()]
    except:
        return None

assert map_country_to_alpha3('af') == 'AFG'
assert map_country_to_alpha3(u'Åland Islands') == 'ALA'

Read 335 entities from country sheet https://docs.google.com/spreadsheets/d/11Q9Gq-BNTElOsA4u5BdIcM8CWG-eWRT8c3I_8AQ03CM


### Read CSV from file to translate
http://docs.google.com/spreadsheet/pub?key=pyj6tScZqmEcjeKHnZq6RIg&output=csv

In [80]:
# Returns alpha3 for country or lat|lon for city
# Returns None if no translation found
# Returns '' if we should omit place from output


def translate_placename(name):
    alpha3 = map_country_to_alpha3(name)
    if alpha3 != None:
        return alpha3
    if name in city2latlon:
        return '%.10f|%.10f' % (city2latlon[name]['lat'], city2latlon[name]['lon'])
    
assert translate_placename('France') == 'FRA'
assert translate_placename('Seattle') == '47.6062090000|-122.3320710000'

In [81]:
def translate_placenames(url):
    out = cStringIO.StringIO()
    writer = csv.writer(out)
    
    data = read_csv(url)
    # print 'Read %d rows from %s' % (len(data.values), url)

    header = data.columns.values
    writer.writerow([] + list(header))

    for place in data.values:
        placename = place[0].strip()
        
        translated = translate_placename(placename)
        
        if translated == None:
            # print '%s has no mapping' % placename
            continue
            
        if translated == '':
            # Ignore
            continue
            
        writer.writerow([translated] + list(place)[1:])
            
    return out.getvalue() 
            
print translate_placenames('http://docs.google.com/spreadsheet/pub?key=pyj6tScZqmEcjeKHnZq6RIg&output=csv')
# print translate_placenames('http://docs.google.com/spreadsheet/pub?key=phAwcNAVuyj1NHPC9MyZ9SQ&output=csv')
# translate_placenames('http://docs.google.com/spreadsheet/pub?key=phAwcNAVuyj0XOoBL_n5tAQ&output=csv')
# translate_placenames('http://docs.google.com/spreadsheet/pub?key=0ArfEDsV3bBwCdERNZmlfUGM5YVE3bmEwODdlRDFqSkE&output=csv')
# translate_placenames('http://data.cmucreatelab.org/earthtime/LuxembourgIncomeStudy/Gini_Coefficient.csv')
# translate_placenames('https://data.cmucreatelab.org/earthtime/IRENA/Solar.Electricity_capacity_MW.csv')
# translate_placenames('https://data.cmucreatelab.org/earthtime/IRENA/Wind.Electricity_capacity_MW.csv')
# translate_placenames('http://docs-proxy.cmucreatelab.org/spreadsheets/d/1RnCeiV0MnvGncVCoNT0LO_XXAt7Z1E60nOw9-hjTdWo/export?format=csv&id=1RnCeiV0MnvGncVCoNT0LO_XXAt7Z1E60nOw9-hjTdWo&gid=1043704810')

GINI index,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010
ABK,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
AFG,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,27.82,,
ALB,,,,,,,,,,,,,,,,,,,,29.12,,,,,28.15,,31.09,33.03,,,34.51,,
DZA,,,,,,,,,,,40.19,,,,,,,35.33,,,,,,,,,,,,,,,
ASM,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
AND,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
AGO,,,,,,,,,,,,,,,,,,,,,,,58.64,,,,,,,,,,
AIA,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
ATG,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
ARG,,,,,,,,,42.79,45.28,,,,46.61,45.48,44.89,45.96,48.91,49.52,49.11,50.74,49.81,51.11,53.36,53.79,54.72,50.18,49.28,47.72,47.37,46.26,46.13,44.49
ARM,,,,,,,,,,,,,,,,,,,44.42,,,36.01,,36.22,35.66,33.8,37.79,36.16,32.84,30.23,30.86,,
ABW,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
AUS,,,,,,,,,,,,,,,,,35.19,,,,,,,,,,,,,,,,
AUT,,,,,,,,,,,,,,,,,,,,,,,29.15,,,,,,,,,,
AZE,,,,,,,,,,,,,,,,,,34.96,,,,,,36.5,,,,,,,33.71,,
BHS,,,,,,,,,,,,,,,,,,,,,,