In [2]:
import pandas as pd
import numpy as np
from sklearn_extra.cluster import KMedoids
import geopandas as gpd

In [None]:
Xo = pd.read_csv('X.csv')
X = Xo.iloc[:,1:]

kmedoids = KMedoids(n_clusters=8, method='pam', init='build', metric='euclidean')
kmedoids.fit(X)

clusters = kmedoids.labels_
medoids = kmedoids.cluster_centers_

clusters_df = pd.DataFrame([Xo['GEOID'].tolist(), clusters.tolist()]).T
clusters_df.columns = ['GEOID','Cluster']
clusters_df['GEOID'] = clusters_df['GEOID'].astype(str).str.replace('1400000US', '', regex=False)

census_tracts = gpd.read_file('shapefile/tl_2020_17_tract.shp')
cook_county_tracts = census_tracts[census_tracts['COUNTYFP'] == '031']

plot_df = cook_county_tracts.merge(clusters_df, on='GEOID')

In [4]:
cluster_colors = [
    '#20b46c', '#98d414', '#d05c3c', '#1f8090', 
    '#f8c41c', '#e08126', '#a45d95', '#0c2c7a'
]

In [5]:
import folium
from folium import Element
from folium.plugins import GroupedLayerControl

attr = (
    '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> '
    'contributors, &copy; <a href="https://cartodb.com/attributions">CartoDB</a>'
)
tiles = "https://{s}.basemaps.cartocdn.com/light_nolabels/{z}/{x}/{y}.png"

m = folium.Map(location=[41.832015, -87.654070], tiles=tiles, attr=attr, zoom_start=10,layer_control=False,control_scale=False)

cluster_groups = []

for cluster_num in range(8):
    cluster_group = folium.FeatureGroup(name=f'Cluster {cluster_num + 1}')

    for _, row in plot_df.iterrows():
        geometry = row['geometry']
        if int(row['Cluster']) == cluster_num:
            color = cluster_colors[cluster_num]
            
            style_function = lambda x, color=color: {
                'fillColor': color,
                'color': None,  
                'weight': 0, 
                'fillOpacity': 0.7
            }

            if geometry.geom_type == 'Polygon':
                folium.GeoJson(geometry, style_function=style_function).add_to(cluster_group)
            elif geometry.geom_type == 'MultiPolygon':
                for poly in geometry.geoms:
                    folium.GeoJson(poly, style_function=style_function).add_to(cluster_group)
    
    cluster_groups.append(cluster_group)

all_groups = folium.FeatureGroup(name='All Clusters')

for cluster_group in cluster_groups:
    cluster_group.add_to(all_groups)

m.add_child(all_groups)

folium.LayerControl(collapsed=False,hideSingleBase=True).add_to(m)

GroupedLayerControl(
    groups={'Clusters': cluster_groups},
    collapsed=False
).add_to(m)

about_section = folium.Element("""
    <div style="position: fixed;
                bottom: 50px; left: 10px; width: 300px; height: auto;
                background-color: white; z-index: 1000;
                padding: 10px; border: 1px solid lightgray;
                border-radius: 5px; font-family: Arial, sans-serif;
                font-size: 13px; box-shadow: 2px 2px 5px rgba(0,0,0,0.3);">
        <h4>About This Map</h4>
        <p>This map visualizes US Census Tracts clusted by investment quality. For more information about cluster charactersitics, methylogy, or data used, vist the
                                <a href="images/documentation.pdf" target="_blank">documentation</a>.</p>
        <p> Created by Carlos Soto csoto701@uchicago.edu or Carlos.Soto@exprealty.com </p>
        </div>                
""")

css = """
<style>
    .leaflet-control-layers-expanded {
        font-size: 1.5em; /* Increase the font size */
        width: 120px; /* Adjust width if necessary */
        top: 50px;
    }
    .leaflet-control-layers-list{
    }
</style>
"""

legend_html = """
<div style="
    position: fixed; 
    top: 430px; right: 10px; width: 245px; height: 120px; 
    background-color: white; border:2px solid grey; z-index:9999; 
    font-size:1.5em; padding: 10px; display: grid; 
    grid-template-columns: repeat(4, 50px); 
    grid-gap: 10px; text-align: center;">
"""

for i, color in zip(list(range(1,9)), cluster_colors):
    legend_html += f"""
    <div style="
        width: 40px; height: 40px; 
        background-color: {color}; 
        display: flex; justify-content: center; align-items: center; 
        border: 1px solid black; color: white; font-weight: bold;">
        {i}
    </div>
    """

legend_html += "</div>"

m.get_root().html.add_child(Element(legend_html))
m.get_root().html.add_child(folium.Element(css))
m.get_root().html.add_child(about_section)

m.save('investment-cluster-map.html')

In [None]:
U, E, Vt = np.linalg.svd(X)

evars = [       
'median home value scaled', 'avg household size scaled','<18yrs scaled', 'married household scaled', 'child scaled',
       'nonrelatives scaled', 'other relatives scaled', 'new births scaled','<9th grade scaled', '9th-12th grade scaled', 
       'college graduate scaled','grad graduate scaled', 'total pop scaled', 'median household income scaled', 'in labor force scaled',
       'not in labor force scaled', 'median rent scaled', 'occupied_scaled','vacant_scaled', 'owner occupied scaled', 
       'renter occupied scaled']

vts = []
for vt in Vt:
    top_indices = np.argsort(np.abs(vt))[-5:][::-1] 
    top_values = vt[top_indices]
    vts.append(" + ".join([f"{round(vt[i],2)}{evars[i]}" for i in top_indices]))

for i in range(5):
    print(vts[i])

-0.33college graduate scaled + -0.31grad graduate scaled + -0.3median household income scaled + -0.28median rent scaled + 0.289th-12th grade scaled
-0.4renter occupied scaled + 0.4owner occupied scaled + 0.3married household scaled + 0.3child scaled + 0.29avg household size scaled
-0.54not in labor force scaled + 0.34<9th grade scaled + 0.34in labor force scaled + 0.26nonrelatives scaled + 0.24avg household size scaled
0.42occupied_scaled + -0.42vacant_scaled + -0.38<18yrs scaled + -0.32new births scaled + -0.27child scaled
0.57new births scaled + -0.36avg household size scaled + -0.36<9th grade scaled + -0.35nonrelatives scaled + -0.29median home value scaled
