In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

### Below are Functions for cleaning up strings and converting datatypes. They were devised and implemented on a previous iteration of scraping:

In [3]:
remove_chars_Assesement = ['$', ',', "\xa0"]
remove_chars_Taxes = ['$', ',', "\xa0", ' **']
remove_chars_Year = ['\xa0', ' *']
remove_chars_Address = ['\xa0']
#input characters or character patterns you want removed in the above format

def Year_Cleaner(df, col, chars):
    result = list()
    for i in df[col]:
        for char in chars:
            i = i.replace(char, '')
        result.append(i)
    return pd.Series(result).astype(int)

def Annual_Assesement_Cleaner(df, col, chars):
    result = list()
    for i in df[col]:
        for char in chars:
            i = i.replace(char, '')
        result.append(i)
    return pd.Series(result).astype(int).astype(float)

def Taxes_and_Fees_Cleaner(df, col, chars):
    result = list()
    for i in df[col]:
        i = i.replace('Taxes and fees will be published after Council adopts the budget in May. ', '0')
        for char in chars:
            i = i.replace(char, '')
        result.append(i)
    result_series = pd.Series(result).replace('0', np.nan).astype(float)
    return result_series

def Address_Obj_to_string(df, col, chars):
    result = list()
    for i in df[col]:
        for char in chars:
            i = i.replace(char, '')
        result.append(i)
    return pd.Series(result)


In [4]:
#df1 = pd.DataFrame(pd.read_csv('Land_Building_Table.csv')).drop(columns='Unnamed: 0').rename(columns={'A_Assesement' : 'year'})
#df1.set_index(['Num', 'year'], inplace=True)
#df1

#### The Cell Below cleans df2 and df3 by:
    - Dropping extraneous columns
    - Changing column names to be more descriptive
    - Converting dtypes from objects to floats and strings where appropriate
#### Df2 and 3 are the outdated datasets

In [5]:
df2 = pd.DataFrame(pd.read_csv('New_table.csv')).drop(columns='Unnamed: 0').rename(columns={'A_Levy_Year' : 'year'})
df2['year'] = Year_Cleaner(df2, 'year', chars=remove_chars_Year)
df2['A_Annual_Assesement'] = Annual_Assesement_Cleaner(df2, 'A_Annual_Assesement', chars=remove_chars_Assesement)
df2['A_Taxes_and_Fees'] = Taxes_and_Fees_Cleaner(df2, 'A_Taxes_and_Fees', chars=remove_chars_Taxes)
df2.drop(df2.loc[df2['A_Annual_Assesement'] == 0].index, inplace=True)
df2.drop(df2.loc[df2['A_Taxes_and_Fees'] == 0].index, inplace=True)
print(df2)

df3 = pd.DataFrame(pd.read_csv('Table.csv')).drop(columns='Unnamed: 0').rename(columns={'A_Account_Num':'Num'})
df3['A_Properties'] = Address_Obj_to_string(df3, 'A_Properties', chars=remove_chars_Address)
df3

             Num  year  A_Annual_Assesement  A_Taxes_and_Fees
7       50299400  2023            4395607.0           2410.80
8       50299400  2022            4395607.0           2296.00
9       50299400  2021            4395607.0           1960.00
10      50299400  2020            4395607.0           1120.00
11      50299400  2019            4395607.0           1120.00
...          ...   ...                  ...               ...
501496  50469430  2015              56650.0            590.86
501497  50469430  2014              56650.0            590.86
501498  50469430  2013              56650.0            588.03
501499  50469430  2012              56650.0            565.37
501500  50469430  2011              56650.0            565.37

[484577 rows x 4 columns]


Unnamed: 0,A_Properties,Num,A_SQFT
0,3000 S 28TH ST,50299410,1
1,3102 S 28TH ST,50299400,1
2,"3200 S 28TH ST, UNIT 101",50466800,1090
3,"3200 S 28TH ST, UNIT 102",50466810,1280
4,"3200 S 28TH ST, UNIT 201",50466820,700
...,...,...,...
39456,400 YOAKUM PY,50469430,0
39457,450 YOAKUM PY,50469440,1
39458,3630 ZABRISKIE DR,26312010,1
39459,3634 ZABRISKIE DR,26312020,1


# This is the code used to load the csv output of our webscraper

In [6]:
New_Df = pd.read_csv("Final_Table.csv").drop(columns='Unnamed: 0').dropna()
New_Df['propTaxTotal'] = Taxes_and_Fees_Cleaner(New_Df, 'propTaxTotal', chars=remove_chars_Assesement)
New_Df['Num'] = New_Df['num']
New_Df.dropna(inplace=True)
New_Df.drop(columns='num', inplace=True)

AllFinanceDf = pd.merge(New_Df, df2, how='left').merge(df3)

AllFinanceDf['FULL_ADDS'] = AllFinanceDf['A_Properties'] 
AllFinanceDf.drop(columns='A_Properties')

AllFinanceDf.to_csv('AllFinanceDf.csv')

In [7]:
import pandas as pd
import geopandas as gpd
import folium
cleaneddata = pd.read_csv('Cleaned_Data.csv').drop(columns='Unnamed: 0')
cleaneddata.Zip5 = cleaneddata.Zip5.astype(int).astype(str)
cleaneddata.rename(columns={'Zip5' : 'ZIP'}, inplace=True)
geo_df = gpd.read_file('map_no_alex.geojson')
geo_df.rename(columns={'NAME' : 'ZIP'}, inplace=True)
geo_df.ZIP = geo_df.ZIP.astype(str)
cleaneddata = cleaneddata.loc[cleaneddata.year == 2008]
cleaneddata = cleaneddata.groupby("ZIP").mean().reset_index()
cleaneddata.A_Annual_Assesement = cleaneddata.A_Annual_Assesement.astype(int)

In [8]:

import folium
merged = pd.merge(geo_df, cleaneddata, how='left')

# Create the Folium map
m = folium.Map(location=[38.8048, -77.0469], zoom_start=12.1)

# Create the Choropleth layer
choropleth = folium.Choropleth(
    geo_data=geo_df,
    name='choropleth',
    data=cleaneddata,
    columns=['ZIP', 'A_Taxes_and_Fees'],
    key_on='feature.properties.ZIP',
    fill_color='RdYlGn',
    fill_opacity=0.7,
    line_opacity=1,
    legend_name='Property Taxes Paid'
).add_to(m)

# Add the GeoJsonTooltip
folium.features.GeoJson(
    merged,
    style_function=lambda x: {'fillColor': 'transparent', 'color': 'transparent'},  # Make the overlay invisible
    tooltip=folium.features.GeoJsonTooltip(
        fields=['ZIP', 'A_Annual_Assesement'],
        aliases=['ZIP Code:', 'Average Assesement Value:'],
        localize=True
    )
).add_to(m)

# Display the map
display(m)