## Permits for Arlington

In [1]:
import pandas as pd

In [2]:
def get_browser_page(url):

    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    import time
    
    chrome_options = Options()
    chrome_options.add_argument("--start-maximized");

    browser = webdriver.Chrome("/usr/local/bin/chromedriver",options = chrome_options) 
    browser.get(url)

    browser.find_element_by_xpath('//input[@value = "Search"]').click()
    time.sleep(5)

    try:
        browser.find_element_by_partial_link_text('Show all').click()
        time.sleep(5)
    except:
        print("No show all button")

        
    page     =  browser . page_source
    permits  =  pd . read_html ( page )
    
    browser . close ( )
    
    return permits
        
    
def extract_ArlingtonMA_permits(year):
    import numpy as np
    #from datetime import datetime

    url = 'http://arlserver.town.arlington.ma.us/BuildingPermits/Select.pl?permittype=&issue=1/1/{year}&issue_thru=12/31/{year}'.format(year=year)
    
    permits = get_browser_page(url)

    permits = permits[1]   
    permits.columns = ['streetNum'] + list(permits.columns[1:])

    
    permits[['streetNum','Address']] = permits[['streetNum','Address']].ffill().replace(np.nan,' ', regex=True)
    #permits['asof']  =  datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    
    return permits


def permits_get_solar_norm(permits):

    mask = ~pd.isnull(permits.Description) & permits.Description.str.contains('Solar|solar')

    permits = permits[mask].copy()
    
    permits.Address=permits.Address.str.upper()
    
    for col in ['streetNum','Owner']:
        permits[col]=permits[col].astype(str)
    
    permits = permits[permits.Issued!='2001']  #85 	Oakland Ave 	2001 	B 	R 	23 	remodel kitchen 	Trinity Baptist Church 	David 	7000.0 	105.0 
    permits['Issued']=pd.to_datetime(permits['Issued'],format='%m-%d-%Y').dt.strftime('%Y-%m-%d')

    permits=permits.sort_values('Issued').reset_index(drop=True)

    if permits.Value.dtype==type(str):
        permits.loc[permits.Value.str.contains(' '),'Value']='0'
        permits.Value=permits.Value.astype(float)

    if permits.Fee.dtype==type(str):
        permits.loc[permits.Fee.str.contains(' '),'Fee']='0'
        permits.Fee=permits.Fee.astype(float)

    permits['installer'] = permits['Contractor'].str.lower().str.replace(' |;|,','', regex=True)
    permits['streetNum']  = permits['streetNum'].str.replace('\#','-', regex=True)
    
    permits = permits_norm(permits)

    summary = permits.groupby("installer").agg(
        {"installer":len,
         "Value":sum,
         "Fee":sum,
        }
    )
    summary.columns = ['installs','value','fee']
    
    aggregates = permits.groupby(['streetNum','Address']).agg(
    {
        "Permit":list,
        "Description":list,
        "Contractor":list,
        "Owner":max,
        "Value":sum,
        "Fee":sum,
        "Issued":[list,min]
    }).reset_index()

    aggregates.columns = ['streetNum','address','permits','descriptions','contractor','owner','permit_value','permit_fee','issued','effective_date']
    

    return aggregates, summary


def permits_norm(permits) :

    permits.Address=permits.Address . str . replace ( 'PARK$','PK', regex=True) \
                    . str . replace ( ' WAY$',' WY', regex=True) \
                    . str . replace ( '^BRATTLE TER$','BRATTLE TERR', regex=True) \
                    . str . replace ( '^APACHE TRL$','APACHE TR', regex=True) \
                    . str . replace ( '^FARMER CIR$|^FARMER\'S CIR$','FARMER`S CIR', regex=True) \
                    . str . replace ( '^GRANDVIEW RD$','GRAND VIEW RD', regex=True) \
                    . str . replace ( '^LOMBARD TER$','LOMBARD TERR', regex=True) \
                    . str . replace ( '^MOUNT VERNON ST$','MT. VERNON ST', regex=True) \
                    . str . replace ( '^N UNION ST$','NORTH UNION ST', regex=True) \
                    . str . replace ( '^PARK AVENUE EXT$','PARK AVE EXT', regex=True) \
                    . str . replace ( '^UPLAND RD$|^UPLAND RD W$','UPLAND RD WEST', regex=True) \
                    . str . replace ( '^WALNUT TER$','WALNUT TERR', regex=True) \
                    . str . replace ( '^WYMAN TER$','WYMAN TERR', regex=True) \
                    . str . replace ( '^SUMMER STREET PL$','SUMMER ST PL', regex=True) \
                    . str . replace ( '^RUSSELL TER$','RUSSELL TERR', regex=True) \
                    . str . replace ( '^LORRAINE TER$','LORRAINE TERR', regex=True) \
                    . str . replace ( '^LOWELL STREET PL$','LOWELL ST PL', regex=True) \
                    . str . replace ( '^PARK STREET PL$','PARK ST PL', regex=True) \
                    . str . replace ( '^WEST COURT TE$','WEST COURT TERR', regex=True) \
                    . str . replace ( '^LEE TER$','LEE TERR', regex=True) #\
#                     . str . replace ( '^$','', regex=True) \
#                     . str . replace ( '^$','', regex=True) \
    
    mask = (permits.Address == 'WASHINGTON ST') & (permits.streetNum=='73')
    permits.loc[mask,'streetNum'] = '71'
    mask = (permits.Address == 'CENTRAL ST') & (permits.streetNum=='10')
    permits.loc[mask,'streetNum'] = '8'
    mask = (permits.Address == 'ADAMS ST') & (permits.streetNum=='56-1')
    permits.loc[mask,'streetNum'] = '56'
    mask = (permits.Address == 'HERBERT RD') & (permits.streetNum=='84')
    permits.loc[mask,'streetNum'] = '82'
    mask = (permits.Address == 'BOWDOIN ST') & (permits.streetNum=='13')
    permits.loc[mask,'streetNum'] = '11'
    mask = (permits.Address == 'ALBERMARLE ST') & (permits.streetNum=='16')
    permits.loc[mask,'streetNum'] = '14'
    mask = (permits.Address == 'APPLETON PL') & (permits.streetNum=='377')
    permits.loc[mask,'Address'] = 'APPLETON ST'
    mask = (permits.Address == 'UPLAND RD WEST') & ((permits.streetNum=='11')|(permits.streetNum=='39'))
    permits.loc[mask,'Address'] = 'UPLAND RD'
    mask = ((permits.Address == 'UPLAND RD')|(permits.Address == 'UPLAND RD W')) & ((permits.streetNum=='45'))
    permits.loc[mask,'Address'] = 'UPLAND RD WEST'
    mask = (permits.Address == 'MASS AVE') & ((permits.streetNum=='626'))
    permits.loc[mask,'streetNum'] = '616'
    mask = (permits.Address == 'MASS AVE') & ((permits.streetNum=='869'))
    permits.loc[mask,'streetNum'] = '855'
    mask = (permits.Address == 'BROADWAY') & ((permits.streetNum=='37'))
    permits.loc[mask,'streetNum'] = '33'
    mask = (permits.Address == 'BROADWAY') & ((permits.streetNum=='297'))
    permits.loc[mask,'streetNum'] = '295'
    mask = (permits.Address == 'BROADWAY') & ((permits.streetNum=='144'))
    permits.loc[mask,'streetNum'] = '142'
    mask = (permits.Address == 'PARK AVE EXT') & ((permits.streetNum=='83')|(permits.streetNum=='82')|(permits.streetNum=='85'))
    permits.loc[mask,'streetNum'] = '99'
    mask = (permits.Address == 'NORTH UNION ST') & ((permits.streetNum=='60'))
    permits.loc[mask,'Address'] = 'EVERETT ST'
    permits.loc[mask,'streetNum'] = '187'
    
    return permits

In [None]:
## for single year
# permits = extract_ArlingtonMA_permits(2020)

## for multiple years
permits = pd.DataFrame()
for year in range(2000,2022):
    permits = permits.append(extract_ArlingtonMA_permits(year))

solar_permits, contractor_leaders = permits_get_solar_norm(permits)
print(contractor_leaders.sort_values('installs',ascending=False)[0:30].to_markdown())

In [6]:

data_dir = '/data/code/web/truepersons/data/public/US/MA/Arlington/permits/'
## Save

# solar_permits, contractor_leaders = permits_get_solar_norm(permits)
# solar_permits.to_csv(data_dir+'solar_permits.tsv',sep='\t',index=False)
# permits.to_csv(data_dir+'permits.tsv',sep='\t',index=False)

## Restore

solar_permits  =  pd . read_csv ( data_dir  +  'solar_permits.tsv' , sep = '\t' )
permits        =  pd . read_csv ( data_dir  +  'permits.tsv' , sep = '\t' )

In [11]:
solar_permits[solar_permits.effective_date<='2018-07-31']

Unnamed: 0,streetNum,address,permits,descriptions,contractor,owner,permit_value,permit_fee,issued,effective_date
0,1,ANDREWS WY,"[74, 114]","['Wire solar system', 'install 22 solar panels']","['J palmeri', 'Go Green Industries']",Meredith & Joseph Zong,18000.0,380.0,"['2012-02-02', '2012-02-02']",2012-02-02
1,1,CROSS ST,"[364, 483, 718, 470]","['Roof mounted solar.', 'Roof mounted solar.',...","['Knox Electric', 'Trinity Solar', 'Trinity So...",Kresl Timothy B,15000.0,490.0,"['2017-04-27', '2017-04-27', '2019-05-23', '20...",2017-04-27
2,1,FARMER`S CIR,"[1169, 1432]","['roof mounted solar', 'roof mounted solar']","['Astrum Solar', 'Astrum Solar']",,29000.0,820.0,"['2016-11-21', '2016-11-21']",2016-11-21
3,1,PIONEER RD,"[411, 328, 184, 231]","['solar panels', 'install 9 solar pnels', 'Roo...","['Vivent', 'Vivent solar', 'Trinity Solar', 'T...",Meadows Scott T & Amy L,18267.0,589.0,"['2014-04-30', '2014-04-30', '2017-03-06', '20...",2014-04-30
6,10,CHEVIOT RD,[228],['Solar panels.'],['J. Constintine'],Schack Von David,10766.0,318.0,['2013-02-27'],2013-02-27
...,...,...,...,...,...,...,...,...,...,...
1164,97,OVERLOOK RD,[235],['install 13 solar electric panels'],['solar city'],Meltzer Colin &,6000.0,180.0,['2015-04-07'],2015-04-07
1165,98,RIDGE ST,"[560, 799]","['9.5 KW solar PV system', '30 solar panels on...","['Sunbug', 'Sunbug']",Egler Lesley &,32602.0,919.0,"['2016-06-13', '2016-06-13']",2016-06-13
1167,99,COLLEGE AVE,"[85, 62]","['install 21 rooftop solar electric panels', '...","['Solar Flair Energy INC', 'J. Constintine']",Elledge Christopher & Meghan,20317.0,519.0,"['2013-01-22', '2013-01-22']",2013-01-22
1169,99,PARK AVE EXT,"[852, 1283]","['wire solar panels on roof', 'install solar p...","['G& B Elect', 'BMC Engineering']",Town of Arlington,250000.0,0.0,"['2015-08-19', '2015-09-03']",2015-08-19


In [8]:
%matplotlib widget
import matplotlib.pyplot as plt

summ = permits.groupby(permits.Issued.str[-4:]).agg({
    'Issued':len,
    'Value':sum,
    'Fee':sum
})
plt.figure(figsize=(6, 4), dpi=150)
plt.ylabel('Permitted Value')

#ax1 = summ["Issued"].plot(color='blue', grid=True, label='Issued')
ax2 = summ["Value"].plot(color='red', grid=True, label='Value')
ax3 = summ["Fee"].plot(color='green', grid=True, secondary_y=True, label='Fees')

#h1, l1 = ax1.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()
h3, l3 = ax3.get_legend_handles_labels()
plt.xlabel('Fiscal Year')


plt.legend(h2+h3, l2+l3, loc=2)
plt.show()


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [12]:
print('Permits,{installs},{first},{last}'.format(installs=len(solar_permits),first=solar_permits.effective_date.min(),last=solar_permits.effective_date.max()))

Permits,1172,2003-08-06,2021-11-19


In [14]:
def get_massgis_structures_assessor_shapefiles(town_code=10):
    import geopandas as gpd

    structures_url = 'http://download.massgis.digital.mass.gov/shapefiles/structures/structures_poly_{town_code}.zip'
    structures = gpd.read_file(structures_url . format ( town_code = town_code )).to_crs("EPSG:4326")

    ##fails, see note below on geometry field all None
    assessor_url = 's3+zip://s3.us-east-1.amazonaws.com/download.massgis.digital.mass.gov/shapefiles/l3parcels/L3_SHP_M010_ARLINGTON.zip!M010TaxPar_CY21_FY21.shp'

    ##manual file download and merge shape and database file together
    assessor_url = '/data/L3/L3_SHP_M010_Arlington/M010TaxPar_CY21_FY21.shp'
    assessor_shp = gpd.read_file(assessor_url).to_crs("EPSG:4326")

    assessor_url = '/data/L3/L3_SHP_M010_Arlington/M010Assess_CY21_FY21.dbf'
    assessor_dbf = gpd.read_file(assessor_url)  ## messed up geometry field all None
    assessor_dbf = assessor_dbf.drop('geometry',axis=1).copy()

    combo = assessor_shp.merge(assessor_dbf,on=['TOWN_ID','LOC_ID'],how='outer',indicator=True)
    assessor = combo[combo._merge=='both'].reset_index(drop=True)   ##63 no matches; water, roads, etc.
    assessor = assessor.drop('_merge',axis=1).copy()

    return assessor, structures





In [17]:
def merge_gis_permits(solar_permits, assessor, structures):
    
    assessor, structures = get_massgis_structures_assessor_shapefiles(town_code=10)
    
    solar_systems = solar_permits.merge(assessor,right_on=['ADDR_NUM', 'FULL_STR'],left_on=['streetNum','address'],how='outer',indicator=True)

    solar_systems_wo_geometry = solar_systems[solar_systems._merge=='left_only']

    solar_systems = gpd.GeoDataFrame(solar_systems[solar_systems._merge=='both']).to_crs("EPSG:4326").reset_index(drop=True)

    solar_systems['tokenId'] = np.int64

    for idx in range(len(solar_systems)):

        mask = structures . intersects ( solar_systems . loc [ idx , 'geometry' ] )   ##intersects Vs within finds all structures

        if len ( structures [ mask ] ) > 0 :
            foo = structures[mask][structures[mask].AREA_SQ_FT==structures[mask].AREA_SQ_FT.max()]

            center    =  ( foo . loc [ mask , 'geometry' ] ) .to_crs('+proj=cea') . centroid . to_crs(foo.crs)
            solar_systems.loc[idx,'geometry'] =  ( foo . geometry ) . values ## NB centroid returns lon / lat
        else:
            print(idx,'failed')  ##using centroid of plot instead of structure

NameError: name 'gpd' is not defined