In [1]:
import plotly.plotly as py
from plotly.graph_objs import *

In [9]:
trace0 = Scatter(
    x=[1, 2, 3, 4],
    y=[10, 15, 13, 17],
    fill='tozeroy',
)
trace1 = Scatter(
    x=[1, 2, 3, 4],
    y=[16, 5, 11, 9],
    fill='tonexty',
)
data = Data([trace0, trace1])

py.iplot(data, filename = 'basic-area')

In [163]:
import plotly.plotly as py
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')
df.head()

df['text'] = df['name'] + '<br>Population ' + (df['pop']/1e6).astype(str)+' million'
limits = [(0,2),(3,10),(11,20),(21,50),(50,3000)]
colors = ["rgb(0,116,217)","rgb(255,65,54)","rgb(133,20,75)","rgb(255,133,27)","rgb(255,220,0)"]
cities = []
scale = 50000

for i in range(len(limits)):
    lim = limits[i]
    df_sub = df[lim[0]:lim[1]]
    city = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = df_sub['lon'],
        lat = df_sub['lat'],
        text = df_sub['text'],
        sizemode = 'diameter',
        marker = dict( 
            size = df_sub['pop']/scale, 
            color = colors[i],
            line = dict(width = 2,color = 'black')
        ),
        name = '{0} - {1}'.format(lim[0],lim[1]) )
    cities.append(city)

layout = dict(
        title = '2014 US city populations<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',       
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"           
        ),  
    )
    
fig = dict( data=cities, layout=layout )
py.iplot( fig, validate=False, filename='d3-bubble-map-populations' )

In [164]:
df.head()

Unnamed: 0,name,pop,lat,lon,text
0,New York,8287238,40.730599,-73.986581,New York <br>Population 8.287238 million
1,Los Angeles,3826423,34.053717,-118.242727,Los Angeles <br>Population 3.826423 million
2,Chicago,2705627,41.875555,-87.624421,Chicago <br>Population 2.705627 million
3,Houston,2129784,29.758938,-95.367697,Houston <br>Population 2.129784 million
4,Philadelphia,1539313,39.952335,-75.163789,Philadelphia <br>Population 1.539313 million


In [165]:
leed_data = pd.read_csv('PublicLEEDProjectDirectory.csv', encoding="ISO-8859-1", header=3, low_memory=False)

In [166]:
city_data = leed_data['City'].value_counts()

In [167]:
small_city_data = city_data[:500]

In [168]:
small_long_data = df[:500]

In [169]:
small_city_data = small_city_data.to_frame()

In [170]:
small_long_data=small_long_data.reset_index()

In [171]:
small_long_data.head()

Unnamed: 0,index,name,pop,lat,lon,text
0,0,New York,8287238,40.730599,-73.986581,New York <br>Population 8.287238 million
1,1,Los Angeles,3826423,34.053717,-118.242727,Los Angeles <br>Population 3.826423 million
2,2,Chicago,2705627,41.875555,-87.624421,Chicago <br>Population 2.705627 million
3,3,Houston,2129784,29.758938,-95.367697,Houston <br>Population 2.129784 million
4,4,Philadelphia,1539313,39.952335,-75.163789,Philadelphia <br>Population 1.539313 million


In [172]:
small_city_data.columns = ['name']

In [173]:
small_city_data.head()

Unnamed: 0,name
Confidential,23503
Washington,1537
Dallas,945
Houston,879
Chicago,865


In [174]:
small_city_data = small_city_data.reset_index()

In [175]:
small_city_data.columns = ['name', "number of projects"]

In [176]:
small_city_data.head()

Unnamed: 0,name,number of projects
0,Confidential,23503
1,Washington,1537
2,Dallas,945
3,Houston,879
4,Chicago,865


In [149]:
small_long_data.head()

Unnamed: 0,index,name,pop,lat,lon,text,number of projects,population_adjusted
0,23,Washington,620427,38.894955,-77.036646,Washington <br>Number of Projects 1537,1537,24.773261
1,8,Dallas,1219399,32.776196,-96.796899,Dallas <br>Number of Projects 945,945,7.749719
2,3,Houston,2129784,29.758938,-95.367697,Houston <br>Number of Projects 879,879,4.127179
3,2,Chicago,2705627,41.875555,-87.624421,Chicago <br>Number of Projects 865,865,3.197041
4,0,New York,8287238,40.730599,-73.986581,New York <br>Number of Projects 816,816,0.984647


In [177]:
small_city_data[small_city_data['name'] == "Dallas"]

Unnamed: 0,name,number of projects
2,Dallas,945


In [178]:
small_long_data[small_long_data['name'] == "Chicago "]

Unnamed: 0,index,name,pop,lat,lon,text
2,2,Chicago,2705627,41.875555,-87.624421,Chicago <br>Population 2.705627 million


In [179]:
small_city_data['new_name'] = small_city_data['name'].map(lambda x: x +" ")

In [180]:
small_city_data[small_city_data['new_name'] == "Chicago "]

Unnamed: 0,name,number of projects,new_name
4,Chicago,865,Chicago


In [181]:
small_city_data = small_city_data[['number of projects', 'new_name']]

In [182]:
small_city_data.columns = ['number of projects', 'name']

In [183]:
small_city_data.head()

Unnamed: 0,number of projects,name
0,23503,Confidential
1,1537,Washington
2,945,Dallas
3,879,Houston
4,865,Chicago


In [184]:
df = small_long_data.merge(small_city_data, on='name')

In [185]:
df = df.sort(['number of projects'], ascending=False)

In [188]:
df.shape

(225, 7)

In [209]:
df.head()

Unnamed: 0,index,name,pop,lat,lon,text,number of projects,population_adjusted
24,23,Washington,620427,38.894955,-77.036646,Washington <br>Number of Projects 1537,1537,24.773261
8,8,Dallas,1219399,32.776196,-96.796899,Dallas <br>Number of Projects 945,945,7.749719
3,3,Houston,2129784,29.758938,-95.367697,Houston <br>Number of Projects 879,879,4.127179
2,2,Chicago,2705627,41.875555,-87.624421,Chicago <br>Number of Projects 865,865,3.197041
0,0,New York,8287238,40.730599,-73.986581,New York <br>Number of Projects 816,816,0.984647


In [3]:

df['text'] = df['name'] + '<br>Number of Projects ' + (df['number of projects']).astype(str)
limits = [(0,5),(6,12),(13,20),(21,30),(31,120)]
colors = ["rgb(0, 255, 181)","rgb(255, 224, 0)","rgb(30, 111, 255)","rgb(235, 108, 72)","rgb(93, 0, 169)"]
cities = []
scale = 15

for i in range(len(limits)):
    lim = limits[i]
    df_sub = df[lim[0]:lim[1]]
    city = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = df_sub['lon'],
        lat = df_sub['lat'],
        text = df_sub['text'],
        sizemode = 'diameter',
        marker = dict( 
            size = df_sub['number of projects']/scale, 
            color = colors[i],
            line = dict(width = 2,color = 'black')
        ),
        name = 'LEED Projects: {0} - {1}'.format(df.iloc[lim[0],6],
                                            df.iloc[lim[1],6]))
    cities.append(city)

layout = dict(
        title = 'Total LEED Certified Projects<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',       
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"           
        ),  
    )
    
fig = dict( data=cities, layout=layout )
py.iplot( fig, validate=False, filename='d3-bubble-map-leed' )

NameError: name 'df' is not defined

Do one with all of them, then do one population adjusted

In [189]:
df.head()

Unnamed: 0,index,name,pop,lat,lon,text,number of projects
24,23,Washington,620427,38.894955,-77.036646,Washington <br>Number of Projects 1537,1537
8,8,Dallas,1219399,32.776196,-96.796899,Dallas <br>Number of Projects 945,945
3,3,Houston,2129784,29.758938,-95.367697,Houston <br>Number of Projects 879,879
2,2,Chicago,2705627,41.875555,-87.624421,Chicago <br>Number of Projects 865,865
0,0,New York,8287238,40.730599,-73.986581,New York <br>Number of Projects 816,816


In [190]:
df['population_adjusted'] = df['number of projects']/df['pop']*10000

In [191]:
df.head()

Unnamed: 0,index,name,pop,lat,lon,text,number of projects,population_adjusted
24,23,Washington,620427,38.894955,-77.036646,Washington <br>Number of Projects 1537,1537,24.773261
8,8,Dallas,1219399,32.776196,-96.796899,Dallas <br>Number of Projects 945,945,7.749719
3,3,Houston,2129784,29.758938,-95.367697,Houston <br>Number of Projects 879,879,4.127179
2,2,Chicago,2705627,41.875555,-87.624421,Chicago <br>Number of Projects 865,865,3.197041
0,0,New York,8287238,40.730599,-73.986581,New York <br>Number of Projects 816,816,0.984647


In [201]:
df_pop_adj_sort = df.sort(['population_adjusted'], ascending=False)

In [202]:
df_pop_adj_sort.head()

Unnamed: 0,index,name,pop,lat,lon,text,number of projects,population_adjusted
28,460,Portland,66147,45.520247,-122.674195,Portland <br>Number of Projects 600,600,90.707062
12,444,Jacksonville,68161,30.332184,-81.655651,Jacksonville <br>Number of Projects 282,282,41.372632
212,439,Santa Fe,68736,35.687,-105.9378,Santa Fe <br>Number of Projects 281,281,40.881052
152,420,Wilmington,70978,39.745947,-75.546589,Wilmington <br>Number of Projects 248,248,34.940404
89,113,Grand Rapids,189054,42.963241,-85.667864,Grand Rapids <br>Number of Projects 484,484,25.601151


In [207]:

df_pop_adj_sort['text'] = df_pop_adj_sort['name'] + '<br>Number of Projects per 10000 ' + (df_pop_adj_sort['number of projects']).astype(str)
limits = [(0,5),(6,12),(13,20),(21,30),(31,120)]
colors = ["rgb(0,116,217)","rgb(255,65,54)","rgb(133,20,75)","rgb(255,133,27)","rgb(255,220,0)"]
cities = []
scale = 1

for i in range(len(limits)):
    lim = limits[i]
    df_sub = df_pop_adj_sort[lim[0]:lim[1]]
    city = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = df_sub['lon'],
        lat = df_sub['lat'],
        text = df_sub['text'],
        sizemode = 'diameter',
        marker = dict( 
            size = df_sub['population_adjusted']/scale, 
            color = colors[i],
            line = dict(width = 2,color = 'black')
        ),
        name = 'Projects/10,000 Residents: {0} - {1}'.format(int(df_pop_adj_sort.iloc[lim[0],7]),
                                            int(df_pop_adj_sort.iloc[lim[1],7])))
    cities.append(city)

layout = dict(
        title = 'Population Adjusted LEED Certified Projects<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',       
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"           
        ),  
    )
    
fig = dict( data=cities, layout=layout )
py.iplot( fig, validate=False, filename='d3-bubble-map-leed_adjusted' )