In [1]:
import pandas as pd
import plotly.plotly as py
import plotly.tools as tls

In [4]:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')
print(df.shape)
df['pop']=df['pop']/1000
df.head()

(3228, 4)


Unnamed: 0,name,pop,lat,lon
0,New York,8287.238,40.730599,-73.986581
1,Los Angeles,3826.423,34.053717,-118.242727
2,Chicago,2705.627,41.875555,-87.624421
3,Houston,2129.784,29.758938,-95.367697
4,Philadelphia,1539.313,39.952335,-75.163789


In [22]:
## Create a new column called text that will read well in HTML
## Note that <br> in HTML will create a new line
## Convert the population to "in thousands" and as a string
## Cat all of this together
## This will be part of the pop-up text on the map generated
df['text'] = 'City: ' + df['name'] + '<br>Population: ' + (df['pop']).astype(str)+' thousand'
print(df.head())
#print(df.iloc[0:1,2:3])

            name       pop        lat         lon  \
0      New York   8287.238  40.730599  -73.986581   
1   Los Angeles   3826.423  34.053717 -118.242727   
2       Chicago   2705.627  41.875555  -87.624421   
3       Houston   2129.784  29.758938  -95.367697   
4  Philadelphia   1539.313  39.952335  -75.163789   

                                                text  
0   City: New York <br>Population: 8287.238 thousand  
1  City: Los Angeles <br>Population: 3826.423 tho...  
2    City: Chicago <br>Population: 2705.627 thousand  
3    City: Houston <br>Population: 2129.784 thousand  
4  City: Philadelphia <br>Population: 1539.313 th...  


In [26]:
## The dataset is already sorted by population size
## The limits and colors will be used as bubbles on the map
## and as part of the legend.
## The first element in limits is (0,2) which are the top two
## highest population cities. (3,10) are the next 7 highest
## population citites, etc. 
limits = [(0,2),(3,10),(11,50),(51,100),(101,3000)]
colors = ["rgb(100,116,217)","rgb(255,65,54)","rgb(255,133,27)","grey","lightgrey"]
cities = []
scale = 5

##This builds the list of cities[]
for i in range(len(limits)):
    lim = limits[i]
    df_sub = df[lim[0]:lim[1]]
    city = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = df_sub['lon'],
        lat = df_sub['lat'],
        text = df_sub['text'],
        marker = dict(
            size = df_sub['pop']/scale,
            color = colors[i],
            line = dict(width=0.5, color='rgb(40,40,40)'),
            sizemode = 'area'
        ),
        ##This is Python3 formatting
        ## RE: https://docs.python.org/3/library/string.html
        ## Here, {0} will be lim[0] and {1} will be lim[1]
        ##name = '{0} - {1}'.format(lim[0],lim[1])
        ## I am changing this to a better legend
        ## The iloc is an index location in the dataframe
        name="{0} - {1} thousand".format(int(df.iloc[lim[1],1]), int(df.iloc[lim[0],1]))
        #df[lim[0],'pop']/1e6).astype(str))
        )
    cities.append(city)

In [27]:
layout = dict(
        title = '2014 US city populations<br>(Click legend to toggle traces)<br>Yi Li',
        showlegend = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"
        ),
    )

##To make this plotly map, we needed a proper dataset
## and a layout
## The above code created both
fig = dict( data=cities, layout=layout )
py.plot( fig, validate=False, filename='d3-bubble-map-populations' )
##This creates a direct link to the vis
tls.get_embed('https://plot.ly/~GULily/503')


'<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plot.ly/~GULily/503.embed" height="525" width="100%"></iframe>'