In [1]:
import plotly.plotly as py
from plotly.graph_objs import *

In [9]:
trace0 = Scatter(
    x=[1, 2, 3, 4],
    y=[10, 15, 13, 17],
    fill='tozeroy',
)
trace1 = Scatter(
    x=[1, 2, 3, 4],
    y=[16, 5, 11, 9],
    fill='tonexty',
)
data = Data([trace0, trace1])

py.iplot(data, filename = 'basic-area')

In [10]:
import plotly.plotly as py
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')
df.head()

df['text'] = df['name'] + '<br>Population ' + (df['pop']/1e6).astype(str)+' million'
limits = [(0,2),(3,10),(11,20),(21,50),(50,3000)]
colors = ["rgb(0,116,217)","rgb(255,65,54)","rgb(133,20,75)","rgb(255,133,27)","rgb(255,220,0)"]
cities = []
scale = 50000

for i in range(len(limits)):
    lim = limits[i]
    df_sub = df[lim[0]:lim[1]]
    city = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = df_sub['lon'],
        lat = df_sub['lat'],
        text = df_sub['text'],
        sizemode = 'diameter',
        marker = dict( 
            size = df_sub['pop']/scale, 
            color = colors[i],
            line = dict(width = 2,color = 'black')
        ),
        name = '{0} - {1}'.format(lim[0],lim[1]) )
    cities.append(city)

layout = dict(
        title = '2014 US city populations<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',       
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"           
        ),  
    )
    
fig = dict( data=cities, layout=layout )
py.iplot( fig, validate=False, filename='d3-bubble-map-populations' )

In [68]:
df.head(300)

Unnamed: 0_level_0,pop,lat,lon,text
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
New York,8287238,40.730599,-73.986581,New York <br>Population 8.287238 million
Los Angeles,3826423,34.053717,-118.242727,Los Angeles <br>Population 3.826423 million
Chicago,2705627,41.875555,-87.624421,Chicago <br>Population 2.705627 million
Houston,2129784,29.758938,-95.367697,Houston <br>Population 2.129784 million
Philadelphia,1539313,39.952335,-75.163789,Philadelphia <br>Population 1.539313 million
Phoenix,1465114,33.446768,-112.075672,Phoenix <br>Population 1.465114 million
San Antonio,1359174,29.424600,-98.495141,San Antonio <br>Population 1.359174 million
San Diego,1321016,32.717421,-117.162771,San Diego <br>Population 1.321016 million
Dallas,1219399,32.776196,-96.796899,Dallas <br>Population 1.219399 million
San Jose,971495,37.343850,-121.883135,San Jose <br>Population 0.971495 million


In [37]:
leed_data = pd.read_csv('PublicLEEDProjectDirectory.csv', encoding="ISO-8859-1", header=3, low_memory=False)

In [40]:
city_data = leed_data['City'].value_counts()

In [72]:
small_city_data = city_data[:300]

In [73]:
small_long_data = df[:300]

In [80]:
small_city_data = small_city_data.to_frame()

In [76]:
small_long_data=small_long_data.reset_index()

In [81]:
small_city_data.columns = ['name']

In [84]:
small_city_data = small_city_data.reset_index()

In [85]:
small_city_data.columns = ['name', "number of projects"]

In [83]:
small_long_data.head()

Unnamed: 0,name,pop,lat,lon,text
0,New York,8287238,40.730599,-73.986581,New York <br>Population 8.287238 million
1,Los Angeles,3826423,34.053717,-118.242727,Los Angeles <br>Population 3.826423 million
2,Chicago,2705627,41.875555,-87.624421,Chicago <br>Population 2.705627 million
3,Houston,2129784,29.758938,-95.367697,Houston <br>Population 2.129784 million
4,Philadelphia,1539313,39.952335,-75.163789,Philadelphia <br>Population 1.539313 million


In [87]:
small_city_data.head()

Unnamed: 0,name,number of projects
0,Confidential,23503
1,Washington,1537
2,Dallas,945
3,Houston,879
4,Chicago,865


In [93]:
small_city_data.merge(small_long_data, on = 'name')

Unnamed: 0,name,number of projects,pop,lat,lon,text


In [98]:
small_city_data[small_city_data['name'] == "Dallas"]

Unnamed: 0,name,number of projects
2,Dallas,945


In [101]:
small_long_data[small_long_data['name'] == "Chicago "]

Unnamed: 0,name,pop,lat,lon,text
2,Chicago,2705627,41.875555,-87.624421,Chicago <br>Population 2.705627 million


In [102]:
small_city_data['new_name'] = small_city_data['name'].map(lambda x: x +" ")

In [104]:
small_city_data[small_city_data['new_name'] == "Chicago "]

Unnamed: 0,name,number of projects,new_name
4,Chicago,865,Chicago


In [105]:
small_city_data = small_city_data[['number of projects', 'new_name']]

In [107]:
small_city_data.columns = ['number of projects', 'name']

In [110]:
df = small_long_data.merge(small_city_data, on='name')

In [123]:
df = df.sort(['number of projects'], ascending=False)

In [124]:
df.head()

Unnamed: 0,name,pop,lat,lon,text,number of projects
23,Washington,620427,38.894955,-77.036646,Washington <br>Number of Projects 1537,1537
8,Dallas,1219399,32.776196,-96.796899,Dallas <br>Number of Projects 945,945
3,Houston,2129784,29.758938,-95.367697,Houston <br>Number of Projects 879,879
2,Chicago,2705627,41.875555,-87.624421,Chicago <br>Number of Projects 865,865
0,New York,8287238,40.730599,-73.986581,New York <br>Number of Projects 816,816


In [134]:

df['text'] = df['name'] + '<br>Number of Projects ' + (df['number of projects']).astype(str)
limits = [(0,5),(6,12),(13,20),(21,30),(31,120)]
colors = ["rgb(0,116,217)","rgb(255,65,54)","rgb(133,20,75)","rgb(255,133,27)","rgb(255,220,0)"]
cities = []
scale = 15

for i in range(len(limits)):
    lim = limits[i]
    df_sub = df[lim[0]:lim[1]]
    city = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = df_sub['lon'],
        lat = df_sub['lat'],
        text = df_sub['text'],
        sizemode = 'diameter',
        marker = dict( 
            size = df_sub['number of projects']/scale, 
            color = colors[i],
            line = dict(width = 2,color = 'black')
        ),
        name = 'LEED Projects: {0} - {1}'.format(df.iloc[lim[0],5],
                                            df.iloc[lim[1],5]))
    cities.append(city)

layout = dict(
        title = 'Total LEED Certified Projects<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',       
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"           
        ),  
    )
    
fig = dict( data=cities, layout=layout )
py.iplot( fig, validate=False, filename='d3-bubble-map-leed' )

In [132]:
df.iloc[1,5]

945

Do one with all of them, then do one population adjusted