In [1]:
import pandas as pd
import numpy as np
import requests
from datetime import datetime
import re
from matplotlib import pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.cluster.hierarchy import fcluster

In [2]:
import plotly.figure_factory as ff

In [3]:
import dash_leaflet as dl

In [4]:
import sys
sys.path.append('..')

In [5]:

from config import cc_api_key
import folium

In [6]:
from utils.leg_api import generate_cc_df,start_hierarchical,plot_denogram,get_votes,cluster,make_base_map,find_close_votes

In [20]:
from dash import Dash, html, dcc, callback, Output, Input,State

In [23]:
import dash_bootstrap_components as dbc

In [8]:
import geopandas as gpd

In [9]:
from dash_extensions.javascript import assign,arrow_function

In [10]:
import json

In [37]:
def find_close_votes():
    df = get_votes()
    vote_counts = df.groupby(['VoteEventItemId', 'VoteValueName']).size()
    pivot_table = vote_counts.unstack(fill_value=0)
    pivot_table = pivot_table.reset_index()

    pivot_table['anti'] = pivot_table['Negative'] + pivot_table['Abstain']
    pivot_table['total'] = pivot_table['Affirmative'] + pivot_table['anti']
    pivot_table['ratio'] = pivot_table['anti'] / pivot_table['Affirmative']

    pivot_table = pivot_table[pivot_table['ratio'].notna()]
    pivot_table.replace([np.inf, -np.inf], np.nan, inplace=True)

    full = pivot_table[pivot_table['total'] > 45]
    top_ratio = full.sort_values('ratio', ascending=False).head(100)

    ratio_ids = top_ratio['VoteEventItemId'].tolist()
    full_ratio = df[df['VoteEventItemId'].isin(ratio_ids)]

    fr_pivot = full_ratio.pivot_table(index='VotePersonName', columns='VoteEventItemId', values='VoteValueId')
    fr_pivot.dropna(axis=1, how='any', inplace=True)

    return fr_pivot

In [14]:
def get_votes():
    all_votes = []

    CM_RAW = requests.get(url="https://webapi.legistar.com/v1/nyc/Bodies/1/OfficeRecords/?$filter=OfficeRecordStartDate+eq+datetime'{}'&token={}".format(SESS_BEGIN, cc_api_key))
    CM_RAW = CM_RAW.json()
    for CM in CM_RAW:
        PERSON_DATA = requests.get(url="https://webapi.legistar.com/v1/nyc/Persons/{}/?&token={}".format(CM["OfficeRecordPersonId"], cc_api_key))
        CM_PERSONAL_DATA = PERSON_DATA.json()
    
        VOTES = requests.get(url="https://webapi.legistar.com/v1/nyc/Persons/{}/votes/?$filter=VoteLastModifiedUtc+gt+datetime'{}'&token={}".format(CM_PERSONAL_DATA["PersonId"], SESS_BEGIN, cc_api_key))
        VOTES_JSON = VOTES.json()
    
        all_votes.extend(VOTES_JSON)

    VOTER = pd.DataFrame(all_votes)
    return VOTER

In [5]:
TODAY = datetime.today()
SESS_BEGIN = TODAY.replace(year=TODAY.year - ((TODAY.year % 4) - 2), month=1, day=1).strftime("%Y-%m-%d") if (TODAY.year % 4) >= 2 else TODAY.replace(year=TODAY.year - ((TODAY.year % 4) + 2), month=1, day=1).strftime("%Y-%m-%d")
SESS_END = "{}-{}-{}".format(int(SESS_BEGIN.split("-")[0]) + 3, 12, 31)

In [9]:
vote_raw = get_votes()

In [26]:
close_votes = find_close_votes(vote_raw)

In [6]:
CM_RAW = requests.get(url="https://webapi.legistar.com/v1/nyc/Bodies/1/OfficeRecords/?$filter=OfficeRecordStartDate+eq+datetime'{}'&token={}".format(SESS_BEGIN, cc_api_key))

In [7]:
all_votes = []

CM_RAW = CM_RAW.json()
for CM in CM_RAW:
    VOTES = requests.get(url="https://webapi.legistar.com/v1/nyc/Persons/{}/votes/?$filter=VoteLastModifiedUtc+gt+datetime'{}'&token={}".format(CM["OfficeRecordPersonId"], SESS_BEGIN, cc_api_key))
    VOTES_JSON = VOTES.json()
    
    all_votes.extend(VOTES_JSON)

# Convert the list of votes into a DataFrame
VOTER = pd.DataFrame(all_votes)

In [20]:
vote_counts = VOTER.groupby(['VoteEventItemId', 'VoteValueName']).size()
pivot_table = vote_counts.unstack(fill_value=0)
pivot_table = pivot_table.reset_index()

In [9]:
pivot_table['anti'] = pivot_table['Negative'] + pivot_table['Abstain']
pivot_table['total'] = pivot_table['Affirmative'] + pivot_table['anti']
pivot_table['ratio'] = pivot_table['anti'] / pivot_table['Affirmative']

In [11]:
pivot_table = pivot_table[pivot_table['ratio'].notna()]
pivot_table.replace([np.inf, -np.inf], np.nan, inplace=True)

In [12]:
full = pivot_table[pivot_table['total'] > 45]
top_ratio = full.sort_values('ratio', ascending=False).head(100)

In [9]:
# Initialize the app
app = Dash(__name__)

In [10]:
cc_df = generate_cc_df()
votes_df = find_close_votes()

In [16]:
def scale_and_standardize(df):

    one_hot = pd.get_dummies(df, columns=df.columns)
    scaler = StandardScaler()
    scaled_df = scaler.fit_transform(one_hot)
    return scaled_df

In [18]:
scaled_df = scale_and_standardize(votes_df)

In [17]:
def hierarchical_cluster(df):
    Z = linkage(df, 'ward')
    return Z


In [98]:
def start_hierarchical(close_votes):
    scaled_df = scale_and_standardize(close_votes)
    Z = hierarchical_cluster(scaled_df)
    return Z

In [24]:
gdf = gpd.read_file('../shapefiles/nycc_22a')

In [12]:
Z = start_hierarchical(votes_df)

In [114]:
clusters = cluster(Z,votes_df,40)

In [96]:
def make_base_map(df,CM_DATA,gdf):
    gdfe = gdf.to_crs(epsg=4326)
    df = df.reset_index()
    master = pd.merge(CM_DATA, df, left_on='OfficeRecordFullName', right_on='VotePersonName', how='left')
    master = master[['OfficeRecordFullName', 'District', 'h_cluster']]
    master_map = pd.merge(gdfe, master, left_on='CounDist', right_on='District', how='left')

    return master_map

In [115]:
clusters_coords = make_base_map(clusters,cc_df,gdf)

In [116]:
centroid = clusters_coords.geometry.centroid
mean_lat, mean_lon = centroid.y.mean(), centroid.x.mean()


Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [125]:
clusters_coords['h_cluster'].value_counts()

h_cluster
2    36
1     8
3     6
4     1
Name: count, dtype: int64

In [117]:
app = Dash(__name__)

In [118]:
geojson = json.loads(clusters_coords.to_json())

In [108]:
style = {
    'weight': 2, 
    'opacity': 1, 
    'color': 'white',  # Line color
    'dashArray': '3', 
    'fillOpacity': 0.7
}

In [120]:
def generate_colorscale(num_clusters):
    cmap = plt.get_cmap('tab20')  # Or any other suitable colormap
    return [cmap(i / num_clusters) for i in range(num_clusters)]

In [121]:
@app.callback(
    Output('geojson', 'hideout'),
    [Input('submit-button', 'n_clicks')],
    [State('num-clusters-input', 'value')]
)
def update_map_style(n_clicks, num_clusters):
    if not callback_context.triggered:
        raise PreventUpdate
    colorscale = generate_colorscale(num_clusters)
    classes = list(range(1, num_clusters + 1))
    return {'classes': classes, 'colorscale': colorscale, 'style': style}

In [122]:
style_handle = assign("""function(feature){
    const h_cluster = feature.properties.h_cluster;
    let color = '#FFEDA0';  // Default color

    if (h_cluster === 1) {
        color = '#FED976';  // Color for h_cluster 1
    } else if (h_cluster === 2) {
        color = '#FC4E2A';  // Color for h_cluster 2
    } // Add more conditions as needed

    return { fillColor: color, weight: 1, opacity: 1, color: 'black', fillOpacity: 0.7 };
}""")


In [123]:
app = Dash(__name__)

centroid = clusters_coords.geometry.centroid
mean_lat, mean_lon = centroid.y.mean(), centroid.x.mean()


app.layout = html.Div([
    dl.Map(center=[mean_lat, mean_lon], zoom=12, children=[
        dl.TileLayer(),
        dl.GeoJSON(data=geojson, style=style_handle, 
                     hideout={'classes': [], 'colorscale': [], 'style': style}, id="geojson")
    ], style={'width': '100%', 'height': '50vh'}),
])



Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [116]:
app.layout = html.Div([
    
    html.Button('Generate Dendrogram', id='dendrogram-button', n_clicks=0),
    dcc.Graph(id='dendrogram'),
    dcc.Input(id='num-clusters-input', type='number', placeholder='Enter number of clusters'),
    html.Button('Show Clusters on Map', id='cluster-map-button', n_clicks=0),
    dl.Map(
        [dl.TileLayer(url='https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png')],  
        id='map',
        center=[40.7128, -74.0060],  
        zoom=6,
        style={'height': '50vh'}
    ),
])

@app.callback(
    Output('dendrogram', 'figure'),
    Input('dendrogram-button', 'n_clicks'),
    prevent_initial_call=True
)
def generate_dendrogram(n_clicks):
    if n_clicks > 0: 
        Z = start_hierarchical(votes_df)
    return ff.create_dendrogram(Z)

@app.callback(
    Output('map', 'children'),
    [Input('cluster-map-button', 'n_clicks')],
    [State('num-clusters-input', 'value')],
    prevent_initial_call=True
)
def show_clusters_on_map(n_clicks, threshold):
    if n_clicks > 0 and threshold:
        Z = start_hierarchical(votes_df)
        clusters = cluster(Z, votes_df, threshold)
        clusters_coords = make_base_map(clusters, cc_df, gdf)
        tile_layer = dl.TileLayer(url='https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png')
        markers = [dl.Marker(position=coord) for coord in clusters_coords]
        return [tile_layer] + markers
    return [dl.TileLayer(url='https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png')]

Preserving functional map

In [127]:
app = Dash(__name__)

In [170]:
geojson_data = json.loads(clusters_coords.to_json())

In [129]:
style_handle = assign("""function(feature){
    const h_cluster = feature.properties.h_cluster;
    let color = '#FFEDA0';  // Default color

    if (h_cluster === 1) {
        color = '#FED976';  // Color for h_cluster 1
    } else if (h_cluster === 2) {
        color = '#FC4E2A';  // Color for h_cluster 2
    } // Add more conditions as needed

    return { fillColor: color, weight: 1, opacity: 1, color: 'black', fillOpacity: 0.7 };
}""")


In [130]:
centroid = clusters_coords.geometry.centroid
mean_lat, mean_lon = centroid.y.mean(), centroid.x.mean()

app.layout = html.Div([
    dl.Map(center=[mean_lat, mean_lon], zoom=12, children=[
        dl.TileLayer(),
        dl.GeoJSON(data=geojson, style=style_handle, id="geojson")
    ], style={'width': '100%', 'height': '50vh'}),
])


Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [None]:
app.run_server(mode='inline')

End

Improvement 

In [228]:
def generate_colorscale(num_clusters):
    cmap = plt.get_cmap('tab20')
    colorscale = [
        "rgba({},{},{},{})".format(int(r * 255), int(g * 255), int(b * 255), a)
        for r, g, b, a in [cmap(i / num_clusters) for i in range(num_clusters)]
    ]
    return colorscale

In [249]:
app = Dash(__name__)

In [250]:
num_clusters = 5
colors = generate_colorscale(num_clusters)
style = dict(weight=2, opacity=1, color='white', dashArray='3', fillOpacity=0.7)

In [231]:
colors

['rgba(31,119,180,1.0)',
 'rgba(44,160,44,1.0)',
 'rgba(148,103,189,1.0)',
 'rgba(227,119,194,1.0)',
 'rgba(188,189,34,1.0)']

In [251]:
style_handle = assign("""function(feature, context){
    const {num_clusters, colorscale, style} = context.hideout;  // get properties from hideout
    const value = feature.properties.h_cluster;   // get the cluster number
    for (let i = 0; i < num_clusters; ++i) {      // iterate over the number of clusters
        if (value === i) {                        // compare with the cluster index
            style.fillColor = colorscale[i];      // set the fill color according to the index
            break;                                // break after setting the color
        }
    }
    // Debugging log
    console.log('Feature ID: ' + feature.id + ', Cluster: ' + value + ', Color: ' + style.fillColor);
    return style;
}""")


In [252]:
geojson = dl.GeoJSON(data=geojson_data,  
                     style=style_handle,  
                     hideout=dict(colorscale=colors, num_clusters = num_clusters, style=style),
                     id="geojson")

In [253]:
centroid = clusters_coords.geometry.centroid
mean_lat, mean_lon = centroid.y.mean(), centroid.x.mean()

app.layout = html.Div([
    dl.Map(center=[mean_lat, mean_lon], zoom=12, children=[
        dl.TileLayer(),
        geojson
    ], style={'width': '100%', 'height': '50vh'}),
])


Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




End

In [282]:
app = Dash(__name__)

In [278]:
def generate_colorscale(num_clusters, geojson_data):
    cmap = plt.get_cmap('tab20')
    colorscale = [
        {
            "rgba": "rgba({},{},{},{})".format(int(r * 255), int(g * 255), int(b * 255), a),
            "OfficeRecordFullName": feature["properties"]["OfficeRecordFullName"],
            "District": feature["properties"]["District"],
            "Cluster": feature["properties"]["h_cluster"],
        }
        for i, (r, g, b, a) in enumerate(cmap(i / num_clusters) for i in range(num_clusters))
        for feature in geojson_data["features"]
    ]
    return colorscale


In [299]:
app = Dash(__name__)
num_clusters = 5
colorscale = generate_colorscale(num_clusters,geojson_data)

centroid = clusters_coords.geometry.centroid
mean_lat, mean_lon = centroid.y.mean(), centroid.x.mean()

hover_info = html.Div(
    id="hover-info",
    style={
        "position": "absolute",
        "top": "10px",
        "right": "10px",
        "zIndex": "1000",
        "background-color": "white", 
        "padding": "10px",  
        "border": "1px solid #ccc", 
        "border-radius": "5px",  
        "box-shadow": "0 0 5px rgba(0, 0, 0, 0.2)" 
    }
)

app.layout = html.Div([
    dl.Map(center=[mean_lat, mean_lon], zoom=12, children=[
        dl.TileLayer(),
        geojson
    ], style={'width': '100%', 'height': '50vh'}),
    hover_info
])

@app.callback(
    Output("hover-info", "children"),
    Input("geojson", "hoverData")
)
def update_hover_info(hover_data):
    if hover_data is not None:
        properties = hover_data["properties"]
        office_name = properties.get("OfficeRecordFullName", "N/A") 
        district = properties.get("District", "N/A") 
        cluster_number = properties.get("h_cluster", "N/A") 

        return [
            html.H4("District Information"),
            html.P(f"Council Member: {office_name}"),
            html.P(f"District: {district}"),
            html.P(f"Cluster: {cluster_number}")
        ]
    return []




Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [None]:
app.run_server(mode='inline')

Attempt to add user input 

In [321]:
def generate_colorscale(num_clusters, geojson_data):
    cmap = plt.get_cmap('tab20')
    colorscale = [
        "rgba({},{},{},{})".format(int(r * 255), int(g * 255), int(b * 255), a)
        for r, g, b, a in (cmap(i / num_clusters) for i in range(num_clusters))
    ]
    return colorscale


In [322]:
app = Dash(__name__)

initial_threshold = 40  # Adjust this based on what makes sense for your data

# Generate initial clusters and GeoJSON data
clusters = cluster(Z, votes_df, initial_threshold)
clusters_coords = make_base_map(clusters, cc_df, gdf)
geojson_data = json.loads(clusters_coords.to_json())
num_clusters = 5  
colorscale = generate_colorscale(num_clusters, geojson_data)

centroid = clusters_coords.geometry.centroid
mean_lat, mean_lon = centroid.y.mean(), centroid.x.mean()

app.layout = html.Div([
    dcc.Slider(
        id='cluster-threshold-slider',
        min=0,
        max=100,  # Adjust max value based on your data's scale
        step=1,
        value=40,  # Initial value
        marks={i: str(i) for i in range(0, 101, 10)},  # Adjust marks according to your needs
    ),
    html.Div(id='slider-output-container'),  # Optional: Displays chosen threshold
    dl.Map(center=[mean_lat, mean_lon], zoom=12, children=[
        dl.TileLayer(),
        dl.GeoJSON(data=geojson_data,  
                     style=style_handle,  
                     hideout=dict(colorscale=colorscale, num_clusters = num_clusters, style=style),
                     id="geojson")
    ], style={'width': '100%', 'height': '50vh'}),
    hover_info
])

@app.callback(
    Output("geojson", "data"),
    [Input("cluster-threshold-slider", "value")]
)
def update_output(value):
    clusters = cluster(Z, votes_df, value)
    num_clusters = clusters['h_cluster'].nunique()
    clusters_coords = make_base_map(clusters, cc_df, gdf)
    new_geojson_data = json.loads(clusters_coords.to_json())
    colorscale = generate_colorscale(num_clusters, new_geojson_data)
    print(f"Updated GeoJSON Data: {str(geojson_data)[:500]}")
    print(f"Updated GeoJSON Data: {str(colorscale)[:500]}")
    return new_geojson_data




Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [None]:
app.run_server(mode='inline')

In [1]:
app = Dash(__name__)

initial_threshold = 40  
clusters = cluster(Z, votes_df, initial_threshold)
clusters_coords = make_base_map(clusters, cc_df, gdf)
geojson_data = json.loads(clusters_coords.to_json())
num_clusters = 5  
colorscale = generate_colorscale(num_clusters, geojson_data)

centroid = clusters_coords.geometry.centroid
mean_lat, mean_lon = centroid.y.mean(), centroid.x.mean()

hover_info = html.Div(
    id="hover-info",
    style={
        "position": "absolute",
        "top": "10px",
        "right": "10px",
        "zIndex": "1000",
        "background-color": "white", 
        "padding": "10px",  
        "border": "1px solid #ccc", 
        "border-radius": "5px",  
        "box-shadow": "0 0 5px rgba(0, 0, 0, 0.2)" 
    }
)

def create_dendrogram(Z):
    fig = ff.create_dendrogram(Z, orientation='bottom')
    fig.update_layout(width=800, height=400) 
    return fig
dendrogram_fig = create_dendrogram(Z)

app.layout = html.Div([
        html.Div([
        html.P("Text Here Does It work?."),
    ], style={'padding': '20px'}),
    html.Div([
        dcc.Graph(
            id='dendrogram-plot',
            figure=dendrogram_fig
        )
    ]),
    dcc.Slider(
        id='cluster-threshold-slider',
        min=0,
        max=100,  
        step=1,
        value=40,  
        marks={i: str(i) for i in range(0, 101, 10)},  
    ),
    html.Div(id='slider-output-container'),  
    html.Div([
        dl.Map(center=[mean_lat, mean_lon], zoom=12, children=[
            dl.TileLayer(),
            dl.GeoJSON(data=geojson_data,  
                         style=style_handle,  
                         hideout=dict(colorscale=colorscale, num_clusters=num_clusters, style=style),
                         id="geojson")
        ], style={'width': '100%', 'height': '50vh'}),

        hover_info  
    ], style={'position': 'relative'}),  
])


@app.callback(
    Output("geojson", "data"),
    [Input("cluster-threshold-slider", "value")]
)
def update_output(value):
    clusters = cluster(Z, votes_df, value)
    num_clusters = clusters['h_cluster'].nunique()
    clusters_coords = make_base_map(clusters, cc_df, gdf)
    new_geojson_data = json.loads(clusters_coords.to_json())
    colorscale = generate_colorscale(num_clusters, new_geojson_data)
    return new_geojson_data

@app.callback(
    Output("hover-info", "children"),
    Input("geojson", "hoverData")
)
def update_hover_info(hover_data):
    if hover_data is not None:
        properties = hover_data["properties"]
        office_name = properties.get("OfficeRecordFullName", "N/A") 
        district = properties.get("District", "N/A") 
        cluster_number = properties.get("h_cluster", "N/A") 

        return [
            html.H4("District Information"),
            html.P(f"Council Member: {office_name}"),
            html.P(f"District: {district}"),
            html.P(f"Cluster: {cluster_number}")
        ]
    return []


NameError: name 'Dash' is not defined

In [None]:
app.run_server(mode='inline')

In [29]:
def generate_colorscale(num_clusters, geojson_data):
    cmap = plt.get_cmap('viridis')
    colorscale = [
        "rgba({},{},{},{})".format(int(r * 255), int(g * 255), int(b * 255), a)
        for r, g, b, a in (cmap(i / num_clusters) for i in range(num_clusters))
    ]
    print(f"Generated colorscale: {colorscale}")
    return colorscale

style_handle = assign("""function(feature, context){
    const {num_clusters, colorscale, style} = context.hideout;  // get properties from hideout
    const value = feature.properties.h_cluster;   // get the cluster number
    for (let i = 0; i < num_clusters; ++i) {      // iterate over the number of clusters
        if (value === i + 1) {                        // compare with the cluster index
            style.fillColor = colorscale[i];      // set the fill color according to the index
            break;                                // break after setting the color
        }
    }
    // Debugging log
    console.log('Feature ID: ' + feature.id + ', Cluster: ' + value + ', Color: ' + style.fillColor);
    return style;
}""")


In [14]:
cc_df = generate_cc_df()
votes_df = find_close_votes()
gdf = gpd.read_file('../shapefiles/nycc_22a')
Z = start_hierarchical(votes_df)

In [11]:
votes_df = find_close_votes()
cc_df = generate_cc_df()
gdf = gpd.read_file('../shapefiles/nycc_22a')
Z = start_hierarchical(votes_df)
clusters = cluster(Z, votes_df, 40)
clusters_coords = make_base_map(clusters, cc_df, gdf)

In [25]:
clusters = cluster(Z, votes_df, 20)
clusters_coords = make_base_map(clusters, cc_df, gdf)

In [17]:
clusters_coords['h_cluster'].value_counts()

h_cluster
2    36
1     8
3     6
4     1
Name: count, dtype: int64

In [26]:

app = Dash(__name__)

initial_threshold = 30  
clusters = cluster(Z, votes_df, initial_threshold)
clusters_coords = make_base_map(clusters, cc_df, gdf)
geojson_data = json.loads(clusters_coords.to_json())
num_clusters = 15 
colorscale = generate_colorscale(num_clusters, geojson_data)
style = dict(weight=2, opacity=1, color='white', dashArray='3', fillOpacity=0.7)

centroid = clusters_coords.geometry.centroid
mean_lat, mean_lon = centroid.y.mean(), centroid.x.mean()

hover_info = html.Div(
    id="hover-info",
    style={
        "position": "absolute",
        "top": "10px",
        "right": "10px",
        "zIndex": "1000",
        "background-color": "white", 
        "padding": "10px",  
        "border": "1px solid #ccc", 
        "border-radius": "5px",  
        "box-shadow": "0 0 5px rgba(0, 0, 0, 0.2)" 
    }
)

def create_dendrogram(Z):
    fig = ff.create_dendrogram(Z, orientation='bottom')
    fig.update_layout(width=1000, height=600) 
    return fig
dendrogram_fig = create_dendrogram(Z)

app.layout = html.Div([
        html.Div([
        html.P("The application takes voting data from the New York City Councils “Legistar” API and clusters council members by the way that they vote."), 
        html.P("The method of clustering user here is known as “Hierarchical Clustering.” The graph you see below is known as a “dendrogram” and is used in the process of hierarchical clustering. You can think of the top of the y-axis as one cluster, everyone on the City Council, and the bottom as 51 separate clusters in which each council member gets their own cluster. Where you are on the y-axis determines the numbers of clusters. For example, at the very top of the dendrogram we see two lines, which means two clusters. The line on the left is attached of a very small group of individuals at the bottom, these are the councils Republicans. As you go further down, those clusters further subdivide as we get more specific."),  
        html.P("The numbers on the y-axis correspond to the numbers on the slider above the map. Move the slider to change the number of clusters and see how the map changes."), 
    ], style={'padding': '20px','textAlign': 'center'}),
    html.Div([
        dcc.Graph(
            id='dendrogram-plot',
            figure=dendrogram_fig
        )
    ]),
    dcc.Slider(
        id='cluster-threshold-slider',
        min=0,
        max=100,  
        step=1,
        value=40,  
        marks={i: str(i) for i in range(0, 101, 10)},  
    ),
    html.Div(id='slider-output-container'),  
    html.Div([
        dl.Map(center=[mean_lat, mean_lon], zoom=12, children=[
            dl.TileLayer(),
            dl.GeoJSON(data=geojson_data,  
                         style=style_handle,  
                         hideout=dict(colorscale=colorscale, num_clusters=num_clusters, style=style),
                         id="geojson")
        ], style={'width': '100%', 'height': '50vh'}),

        hover_info  
    ], style={'position': 'relative'}),  
])


@app.callback(
    Output("geojson", "data"),
    [Input("cluster-threshold-slider", "value")]
)
def update_output(value):
    clusters = cluster(Z, votes_df, value)
    num_clusters = clusters['h_cluster'].nunique()
    print(f"Updated number of clusters: {num_clusters}")

    clusters_coords = make_base_map(clusters, cc_df, gdf)
    new_geojson_data = json.loads(clusters_coords.to_json())
    properties_summary = [
        {key: feature['properties'][key] for key in ['OfficeRecordFullName', 'District', 'h_cluster']}
        for feature in new_geojson_data['features']
    ]
    print(f"Sample of updated GeoJSON data: {properties_summary[:5]}")

    colorscale = generate_colorscale(num_clusters, new_geojson_data)
    print(f"Updated colorscale: {colorscale}")

    return new_geojson_data

@app.callback(
    Output("hover-info", "children"),
    Input("geojson", "hoverData")
)
def update_hover_info(hover_data):
    if hover_data is not None:
        properties = hover_data["properties"]
        office_name = properties.get("OfficeRecordFullName", "N/A") 
        district = properties.get("District", "N/A") 
        cluster_number = properties.get("h_cluster", "N/A") 

        return [
            html.H4("District Information"),
            html.P(f"Council Member: {office_name}"),
            html.P(f"District: {district}"),
            html.P(f"Cluster: {cluster_number}")
        ]
    return []

Generated colorscale: ['rgba(68,1,84,1.0)', 'rgba(72,25,107,1.0)', 'rgba(70,47,124,1.0)', 'rgba(64,67,135,1.0)', 'rgba(56,86,139,1.0)', 'rgba(48,103,141,1.0)', 'rgba(41,120,142,1.0)', 'rgba(35,136,141,1.0)', 'rgba(30,152,138,1.0)', 'rgba(34,167,132,1.0)', 'rgba(53,183,120,1.0)', 'rgba(83,197,103,1.0)', 'rgba(121,209,81,1.0)', 'rgba(165,218,53,1.0)', 'rgba(210,225,27,1.0)']



Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




Test new structure to fix color issue

In [30]:
app = Dash(__name__,external_stylesheets=[dbc.themes.CYBORG])

initial_threshold = 40  
clusters = cluster(Z, votes_df, initial_threshold)
clusters_coords = make_base_map(clusters, cc_df, gdf)
geojson_data = json.loads(clusters_coords.to_json())
num_clusters = 5  
colorscale = generate_colorscale(num_clusters, geojson_data)
style = dict(weight=2, opacity=1, color='white', dashArray='3', fillOpacity=0.7)

centroid = clusters_coords.geometry.centroid
mean_lat, mean_lon = centroid.y.mean(), centroid.x.mean()

hover_info = html.Div(
    id="hover-info",
    style={
        "position": "absolute",
        "top": "10px",
        "right": "10px",
        "zIndex": "1000",
        "background-color": "white", 
        "padding": "10px",  
        "border": "1px solid #ccc", 
        "border-radius": "5px",  
        "box-shadow": "0 0 5px rgba(0, 0, 0, 0.2)" 
    }
)

def create_dendrogram(Z):
    fig = ff.create_dendrogram(Z, orientation='bottom')
    fig.update_layout(width=1000, height=600) 
    return fig
dendrogram_fig = create_dendrogram(Z)

app.layout = html.Div([
        html.Div([
        html.P("This application takes voting data from the New York City Councils “Legistar” API and clusters council members by the way that they vote. Beacuse most votes are fairly lopsided, this application only looks at the most competitive votes. "), 
        html.P("The method of clustering used here is known as “Hierarchical Clustering.” The graph you see below is called a “dendrogram” and is used in the process of hierarchical clustering. You can think of the top of the y-axis as one cluster, everyone on the City Council, and the bottom as around 25 separate clusters in which each council member is paired with their nearest neighbor(s). The location on the y-axis determines the numbers of clusters. For example, at the very top of the dendrogram we see two lines, which means two clusters. The line on the left is attached of a very small group of individuals at the bottom, these are the council's most conservative members. As you go further down, those clusters further subdivide as we get more specific."),  
        html.P("The numbers on the y-axis correspond to the numbers on the slider above the map. Move the slider to change the number of clusters and see how the map changes. You can also hover over the map to see the district number, council member, and cluster number."), 
    ], style={'font-family': 'Georgia','padding': '20px','textAlign': 'center'}),
    html.Div([
        dcc.Graph(
            id='dendrogram-plot',
            figure=dendrogram_fig
        )
    ]),
    dcc.Slider(
        id='cluster-threshold-slider',
        min=0,
        max=100,  
        step=1,
        value=40,  
        marks={i: str(i) for i in range(0, 101, 10)},  
    ),
    html.Div(id='slider-output-container'),  
    html.Div([
        dl.Map(center=[mean_lat, mean_lon], zoom=12, children=[
            dl.TileLayer(),
            dl.GeoJSON(data=geojson_data,  
                         style=style_handle,  
                         hideout=dict(colorscale=colorscale, num_clusters=num_clusters, style=style),
                         id="geojson")
        ], style={'width': '100%', 'height': '50vh'}),

        hover_info  
    ], style={'position': 'relative'}),  
])


@app.callback(
    Output("geojson", "data"),
    [Input("cluster-threshold-slider", "value")]
)
def update_output(value):
    clusters = cluster(Z, votes_df, value)
    num_clusters = clusters['h_cluster'].nunique()
    clusters_coords = make_base_map(clusters, cc_df, gdf)
    new_geojson_data = json.loads(clusters_coords.to_json())
    colorscale = generate_colorscale(num_clusters, new_geojson_data)
    return new_geojson_data

@app.callback(
    Output("hover-info", "children"),
    Input("geojson", "hoverData")
)
def update_hover_info(hover_data):
    if hover_data is not None:
        properties = hover_data["properties"]
        office_name = properties.get("OfficeRecordFullName", "N/A") 
        district = properties.get("District", "N/A") 
        cluster_number = properties.get("h_cluster", "N/A") 

        return [
            html.H4("District Information"),
            html.P(f"Council Member: {office_name}"),
            html.P(f"District: {district}"),
            html.P(f"Cluster: {cluster_number}")
        ]
    return []


@app.callback(
    Output('geojson', 'hideout'), 
    [Input("cluster-threshold-slider", "value")]
)
def update_hideout(value):

    clusters = cluster(Z, votes_df,value)
    num_clusters = clusters['h_cluster'].nunique()
    clusters_coords = make_base_map(clusters, cc_df, gdf)
    new_geojson_data = json.loads(clusters_coords.to_json())
    colorscale = generate_colorscale(num_clusters, new_geojson_data)
    
    new_hideout = {
        'colorscale': colorscale,
        'num_clusters': num_clusters,
        'style' : style
    }


    return new_hideout


Generated colorscale: ['rgba(68,1,84,1.0)', 'rgba(64,67,135,1.0)', 'rgba(41,120,142,1.0)', 'rgba(34,167,132,1.0)', 'rgba(121,209,81,1.0)']



Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [31]:
app.run_server(mode='inline')

Generated colorscale: ['rgba(68,1,84,1.0)', 'rgba(58,82,139,1.0)', 'rgba(32,144,140,1.0)', 'rgba(94,201,97,1.0)']
Generated colorscale: ['rgba(68,1,84,1.0)', 'rgba(58,82,139,1.0)', 'rgba(32,144,140,1.0)', 'rgba(94,201,97,1.0)']
Generated colorscale: ['rgba(68,1,84,1.0)', 'rgba(71,18,101,1.0)', 'rgba(72,35,116,1.0)', 'rgba(69,52,127,1.0)', 'rgba(64,67,135,1.0)', 'rgba(58,82,139,1.0)', 'rgba(52,94,141,1.0)', 'rgba(46,107,142,1.0)', 'rgba(41,120,142,1.0)', 'rgba(36,132,141,1.0)', 'rgba(32,144,140,1.0)', 'rgba(30,155,137,1.0)', 'rgba(34,167,132,1.0)', 'rgba(47,179,123,1.0)', 'rgba(68,190,112,1.0)', 'rgba(94,201,97,1.0)', 'rgba(121,209,81,1.0)', 'rgba(154,216,60,1.0)', 'rgba(189,222,38,1.0)', 'rgba(223,227,24,1.0)']
Generated colorscale: ['rgba(68,1,84,1.0)', 'rgba(71,18,101,1.0)', 'rgba(72,35,116,1.0)', 'rgba(69,52,127,1.0)', 'rgba(64,67,135,1.0)', 'rgba(58,82,139,1.0)', 'rgba(52,94,141,1.0)', 'rgba(46,107,142,1.0)', 'rgba(41,120,142,1.0)', 'rgba(36,132,141,1.0)', 'rgba(32,144,140,1.0)', '