In [65]:
## SET UP
import pandas as pd
import altair as alt
import folium, os
pd.set_option('display.max_columns', None)

## what years do you want to look back over?
years = ['2016', '2017', '2018', '2019']
years = pd.DataFrame(years)
years.columns = ['YEAR']


## ASSESSMENT DATA
## read in raw csv
asmt = pd.read_csv('/home/dan/Python/QueenCityCounts/llrd_code/data/2019-2020_Assessment_Roll.csv', dtype=object)
## select required columns only, and do some clean up
asmt = asmt[['PRINT KEY','PROPERTY CLASS','NEIGHBORHOOD', 'HOUSE NUMBER', 'STREET']].drop_duplicates()
asmt.rename(columns={'PRINT KEY':'SBL','PROPERTY CLASS':'PROP_TYPE','NEIGHBORHOOD':'NBHD', 'HOUSE NUMBER':'NUMBER'},inplace=True)
asmt['ADDRESS'] = asmt[['NUMBER','STREET']].apply(lambda x: ' '.join(x.values.astype(str)),axis=1)


## CODE VIOLATIONS DATA
## read in raw csv
vios = pd.read_csv('/home/dan/Python/QueenCityCounts/llrd_code/data/Code_Violations.csv', dtype=object)
## select required columns only, and do some clean up
vios = vios[['DATE', 'UNIQUEKEY', 'ADDRESS']].drop_duplicates()
vios['DATE'] = vios['DATE'].apply(lambda x: str(x).split(' ')[0])
vios['DATE'] = pd.to_datetime(vios['DATE'])
vios['NUMBER'] = vios['ADDRESS'].apply(lambda x: str(x).split(' ')[0])
vios['STREET'] = vios['ADDRESS'].apply(lambda x: ' '.join(str(x).split(' ')[1:]))
vios.rename(columns={'UNIQUEKEY':'VIOLATIONS'},inplace=True)
vios['YEAR'] = vios['DATE'].apply(lambda x: str(x.year))
vios = pd.pivot_table(vios, index=['NUMBER', 'STREET', 'YEAR'], values=['VIOLATIONS'], aggfunc='count')
vios.reset_index(inplace=True)


## RENTAL PROPERTY DATA
## read in raw csv
rent = pd.read_csv('/home/dan/Python/QueenCityCounts/llrd_code/data/Rental_Registry.csv', dtype=object)
## select required columns only, and do some clean up
rent = rent[['Print Key', 'Address', 'License Status', 'Issued Datetime', 'Expiration Datetime']].drop_duplicates()
rent = rent[rent['License Status']=='Active']
rent['NUMBER'] = rent['Address'].apply(lambda x: str(x).split(' ')[0])
rent['STREET'] = rent['Address'].apply(lambda x: ' '.join(str(x).split(' ')[1:]))
rent.rename(columns={'License Status':'STATUS','Issued Datetime':'ISSUED',\
                     'Expiration Datetime':'EXPIRES','Print Key':'SBL','Address':'ADDRESS'}, inplace=True)
rent['ISSUED'] = pd.to_datetime(rent['ISSUED'])
rent['EXPIRES'] = pd.to_datetime(rent['EXPIRES'])
rent['IS_RENTAL'] = int(1)


# JOIN RENTAL REGISTRY AND VIOLATIONS
years = years.assign(key=1)
asmt = asmt.assign(key=1)
# duplicate asmt dataframe for each year
df = asmt.merge(years, on='key',how='inner').drop(columns=['key','ADDRESS'])
# asmt <- rental (on number and street as keys)
# note: this assumes every residence on the rental registry has been always been a rental,
# back to the start of the analysis period
df = df.merge(rent[['SBL', 'IS_RENTAL']].drop_duplicates(), on='SBL', how='left')
df['IS_RENTAL'].fillna(0, inplace=True)
# asmt+rental <- vios (on number, street, and year, as keys)
df = df.merge(vios, on=['NUMBER','STREET','YEAR'], how='left')
df['VIOLATIONS'].fillna(0, inplace=True)
# sort dataframe (which now is asmt*year+rental+viols) to get same properties together, and reset index
df = df.sort_values(['YEAR','NUMBER','STREET'], ascending=True).reset_index(drop=True)


# EXPLORATORY ANALYSIS
# in each neighborhood, what percent of residences (zoned 400 SBLs) are on the rental registry? 
pt1 = pd.pivot_table(df[(df['YEAR']=='2019') & (df['PROP_TYPE'].apply(lambda x: x[0])=='4')],\
                        index='NBHD',columns='YEAR', values=['SBL','IS_RENTAL'], \
                        aggfunc={'SBL':(lambda x: len(x.dropna().unique())),'IS_RENTAL':sum})
pt1['PCT'] = pt1['IS_RENTAL']/pt1['SBL']
pt1.rename(columns={'SBL':'HOUSES'},inplace=True)
pt1.sort_values('PCT',ascending=False,inplace=True)

# in each neighborhood, who gets more citations
pt2 = df[(df['PROP_TYPE'].apply(lambda x: x[0])=='2')]
pt2 = pt2[['SBL','NBHD','IS_RENTAL','VIOLATIONS']]
pt2 = pd.pivot_table(pt2, index=['NBHD', 'SBL'], values=['VIOLATIONS','IS_RENTAL'], aggfunc={'IS_RENTAL':sum,'VIOLATIONS':sum}).reset_index()
pt2['IS_RENTAL'] = pt2['IS_RENTAL'].apply(lambda x: 'NO' if x==0.0 else 'YES')
pt2['VIOLATIONS'] = pt2['VIOLATIONS'].apply(lambda x: 'NO' if x==0.0 else 'YES')
pt2 = pd.pivot_table(pt2, index=['NBHD', 'IS_RENTAL'], columns=['VIOLATIONS'], values=['SBL'], aggfunc={'SBL':len}).reset_index()
pt2['NO_VIOS'] = pt2['SBL']['NO']/(pt2['SBL']['NO'] + pt2['SBL']['YES'])
pt2['VIOS'] = pt2['SBL']['YES']/(pt2['SBL']['NO'] + pt2['SBL']['YES'])
pt2 = pt2.T.reset_index(drop=True).T
pt2.rename(columns={0:'NBHD',1:'IS_RENTAL',2:'NO_CIT_RAW',3:'CIT_RAW',4:'NO_CIT',5:'CIT'},inplace=True)
pt2['COUNT'] = pt2['NO_CIT_RAW'] + pt2['CIT_RAW']
pt2

Unnamed: 0,NBHD,IS_RENTAL,NO_CIT_RAW,CIT_RAW,NO_CIT,CIT,COUNT
0,Allentown,NO,486,22,0.956693,0.0433071,508
1,Allentown,YES,99,6,0.942857,0.0571429,105
2,Black Rock,NO,457,94,0.829401,0.170599,551
3,Black Rock,YES,210,160,0.567568,0.432432,370
4,Broadway Fillmore,NO,1678,360,0.823356,0.176644,2038
...,...,...,...,...,...,...,...
67,Upper West Side,YES,505,367,0.579128,0.420872,872
68,West Hertel,NO,597,64,0.903177,0.096823,661
69,West Hertel,YES,153,68,0.692308,0.307692,221
70,West Side,NO,1019,268,0.791764,0.208236,1287


In [67]:
# SANITY CHECKS
df = df[df['NBHD']=='Central Park']
df = df[(df['PROP_TYPE'].apply(lambda x: x[0])=='2')]
df = df[df['IS_RENTAL']==0.0]
len(df['SBL'].unique())


1467

In [49]:
# DATA VISUALIZATIONS
# charts
alt_pt2 = pt2.drop(columns=['CIT_RAW','NO_CIT_RAW']).melt(id_vars=['NBHD','IS_RENTAL','COUNT'])
alt_pt2['TYPE'] = alt_pt2['IS_RENTAL'].apply(lambda x: 'RENT' if x=='YES' else 'OO')
alt_pt2.drop(columns=['IS_RENTAL'],inplace=True)
alt_pt2.rename(columns={'value':'PERCENT','variable':'STATUS'},inplace=True)
alt_pt2.fillna(0, inplace=True)

In [86]:
charts = {}
for nbhd in alt_pt2['NBHD'].unique():
    tmp = alt_pt2[(alt_pt2['NBHD']==nbhd) & (alt_pt2['STATUS']=='CIT')]
    title = [nbhd, ('OO: ' + str(format(int(tmp[(tmp['NBHD']==nbhd) & (tmp['TYPE']=='OO')]['COUNT']),',d'))\
                + ', RENT: ' + str(format(int(tmp[(tmp['NBHD']==nbhd) & (tmp['TYPE']=='RENT')]['COUNT']),',d')))]
    chart = alt.Chart(tmp[['NBHD','STATUS','TYPE','PERCENT']], title = title).mark_bar().encode(
        x=alt.X('TYPE', sort=['OO','RENT'], title = 'type of residence'),
        y=alt.Y('PERCENT:Q', axis=alt.Axis(format='%', title = '% that have received a citation'), scale=alt.Scale(domain=(0, 1))),
        color=alt.Color('STATUS:N', legend = None,
                        scale=alt.Scale(
                            domain=['NO_CIT','CIT'],
                            range=['lightblue','red'])),
#         column=alt.Column('NBHD:N', title = title,),
        order=alt.Order('TYPE')
    )
    charts.update({nbhd:chart})
    
((charts['Allentown'] | charts['Black Rock'] | charts['Broadway Fillmore'] | charts['Central'] | charts['Central Park']) &
(charts['Delavan Grider'] | charts['Ellicott'] | charts['Elmwood Bidwell'] | charts['Elmwood Bryant'] | charts['Fillmore-Leroy']) &
(charts['First Ward'] | charts['Fruit Belt'] | charts['Genesee-Moselle'] | charts['Grant-Amherst'] | charts['Hamlin Park']) &
(charts['Hopkins-Tifft'] | charts['Kaisertown'] | charts['Kenfield'] | charts['Kensington-Bailey'] | charts['Lovejoy']) &
(charts['Lower West Side'] | charts['Masten Park'] | charts['MLK Park'] | charts['North Park'] | charts['Parkside']) &
(charts['Pratt-Willert'] | charts['Riverside'] | charts['Schiller Park'] | charts['Seneca Babcock'] | charts['Seneca-Cazenovia']) &
(charts['South Park'] | charts['University Heights'] | charts['Upper West Side'] | charts['West Hertel'] | charts['West Side'])).save('/home/dan/Python/QueenCityCounts/llrd_code/charts.html')

In [114]:
# map
geojson = os.path.join('data','Neighborhoods.geojson')
m = folium.Map([42.900155, -78.8485], zoom_start=12)
folium.GeoJson(geojson, \
               tooltip = folium.GeoJsonTooltip(fields=['nbhdname'],labels = False)\
              ).add_to(m)

m.save('/home/dan/Python/QueenCityCounts/llrd_code/map.html')