In [1]:
import pandas as pd
import numpy as np
import requests
from datetime import datetime
import re
from matplotlib import pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.cluster.hierarchy import fcluster

In [2]:
import plotly.figure_factory as ff

In [3]:
import dash_leaflet as dl

In [4]:
import sys
sys.path.append('..')

In [5]:

from config import cc_api_key
import folium

In [6]:
from utils.leg_api import generate_cc_df,start_hierarchical,plot_denogram,get_votes,cluster,make_base_map,find_close_votes

In [7]:
from dash import Dash, html, dash_table, dcc, callback, Output, Input,State

In [8]:
import geopandas as gpd

In [239]:
from dash_extensions.javascript import assign,arrow_function

In [31]:
import json

In [37]:
def find_close_votes():
    df = get_votes()
    vote_counts = df.groupby(['VoteEventItemId', 'VoteValueName']).size()
    pivot_table = vote_counts.unstack(fill_value=0)
    pivot_table = pivot_table.reset_index()

    pivot_table['anti'] = pivot_table['Negative'] + pivot_table['Abstain']
    pivot_table['total'] = pivot_table['Affirmative'] + pivot_table['anti']
    pivot_table['ratio'] = pivot_table['anti'] / pivot_table['Affirmative']

    pivot_table = pivot_table[pivot_table['ratio'].notna()]
    pivot_table.replace([np.inf, -np.inf], np.nan, inplace=True)

    full = pivot_table[pivot_table['total'] > 45]
    top_ratio = full.sort_values('ratio', ascending=False).head(100)

    ratio_ids = top_ratio['VoteEventItemId'].tolist()
    full_ratio = df[df['VoteEventItemId'].isin(ratio_ids)]

    fr_pivot = full_ratio.pivot_table(index='VotePersonName', columns='VoteEventItemId', values='VoteValueId')
    fr_pivot.dropna(axis=1, how='any', inplace=True)

    return fr_pivot

In [14]:
def get_votes():
    all_votes = []

    CM_RAW = requests.get(url="https://webapi.legistar.com/v1/nyc/Bodies/1/OfficeRecords/?$filter=OfficeRecordStartDate+eq+datetime'{}'&token={}".format(SESS_BEGIN, cc_api_key))
    CM_RAW = CM_RAW.json()
    for CM in CM_RAW:
        PERSON_DATA = requests.get(url="https://webapi.legistar.com/v1/nyc/Persons/{}/?&token={}".format(CM["OfficeRecordPersonId"], cc_api_key))
        CM_PERSONAL_DATA = PERSON_DATA.json()
    
        VOTES = requests.get(url="https://webapi.legistar.com/v1/nyc/Persons/{}/votes/?$filter=VoteLastModifiedUtc+gt+datetime'{}'&token={}".format(CM_PERSONAL_DATA["PersonId"], SESS_BEGIN, cc_api_key))
        VOTES_JSON = VOTES.json()
    
        all_votes.extend(VOTES_JSON)

    VOTER = pd.DataFrame(all_votes)
    return VOTER

In [5]:
TODAY = datetime.today()
SESS_BEGIN = TODAY.replace(year=TODAY.year - ((TODAY.year % 4) - 2), month=1, day=1).strftime("%Y-%m-%d") if (TODAY.year % 4) >= 2 else TODAY.replace(year=TODAY.year - ((TODAY.year % 4) + 2), month=1, day=1).strftime("%Y-%m-%d")
SESS_END = "{}-{}-{}".format(int(SESS_BEGIN.split("-")[0]) + 3, 12, 31)

In [9]:
vote_raw = get_votes()

In [26]:
close_votes = find_close_votes(vote_raw)

In [6]:
CM_RAW = requests.get(url="https://webapi.legistar.com/v1/nyc/Bodies/1/OfficeRecords/?$filter=OfficeRecordStartDate+eq+datetime'{}'&token={}".format(SESS_BEGIN, cc_api_key))

In [7]:
all_votes = []

CM_RAW = CM_RAW.json()
for CM in CM_RAW:
    VOTES = requests.get(url="https://webapi.legistar.com/v1/nyc/Persons/{}/votes/?$filter=VoteLastModifiedUtc+gt+datetime'{}'&token={}".format(CM["OfficeRecordPersonId"], SESS_BEGIN, cc_api_key))
    VOTES_JSON = VOTES.json()
    
    all_votes.extend(VOTES_JSON)

# Convert the list of votes into a DataFrame
VOTER = pd.DataFrame(all_votes)

In [20]:
vote_counts = VOTER.groupby(['VoteEventItemId', 'VoteValueName']).size()
pivot_table = vote_counts.unstack(fill_value=0)
pivot_table = pivot_table.reset_index()

In [9]:
pivot_table['anti'] = pivot_table['Negative'] + pivot_table['Abstain']
pivot_table['total'] = pivot_table['Affirmative'] + pivot_table['anti']
pivot_table['ratio'] = pivot_table['anti'] / pivot_table['Affirmative']

In [11]:
pivot_table = pivot_table[pivot_table['ratio'].notna()]
pivot_table.replace([np.inf, -np.inf], np.nan, inplace=True)

In [12]:
full = pivot_table[pivot_table['total'] > 45]
top_ratio = full.sort_values('ratio', ascending=False).head(100)

In [9]:
# Initialize the app
app = Dash(__name__)

In [10]:
cc_df = generate_cc_df()
votes_df = find_close_votes()

In [16]:
def scale_and_standardize(df):

    one_hot = pd.get_dummies(df, columns=df.columns)
    scaler = StandardScaler()
    scaled_df = scaler.fit_transform(one_hot)
    return scaled_df

In [18]:
scaled_df = scale_and_standardize(votes_df)

In [17]:
def hierarchical_cluster(df):
    Z = linkage(df, 'ward')
    return Z


In [98]:
def start_hierarchical(close_votes):
    scaled_df = scale_and_standardize(close_votes)
    Z = hierarchical_cluster(scaled_df)
    return Z

In [24]:
gdf = gpd.read_file('../shapefiles/nycc_22a')

In [12]:
Z = start_hierarchical(votes_df)

In [114]:
clusters = cluster(Z,votes_df,40)

In [96]:
def make_base_map(df,CM_DATA,gdf):
    gdfe = gdf.to_crs(epsg=4326)
    df = df.reset_index()
    master = pd.merge(CM_DATA, df, left_on='OfficeRecordFullName', right_on='VotePersonName', how='left')
    master = master[['OfficeRecordFullName', 'District', 'h_cluster']]
    master_map = pd.merge(gdfe, master, left_on='CounDist', right_on='District', how='left')

    return master_map

In [97]:
clusters

VoteEventItemId,380057,380071,380199,380210,380466,380467,380558,380559,381530,382089,...,394512,394513,394523,394657,394758,395002,395006,395643,395647,h_cluster
VotePersonName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Adrienne E. Adams,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,...,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,22
Alexa Avilés,11.0,15.0,11.0,15.0,4.0,4.0,4.0,4.0,15.0,15.0,...,15.0,15.0,12.0,15.0,12.0,15.0,15.0,15.0,11.0,38
Althea V. Stevens,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,...,15.0,15.0,15.0,11.0,15.0,15.0,15.0,15.0,15.0,17
Amanda Farías,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,...,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,19
Ari Kagan,15.0,12.0,15.0,12.0,15.0,15.0,15.0,15.0,15.0,12.0,...,12.0,12.0,15.0,12.0,15.0,12.0,12.0,12.0,15.0,4
Carlina Rivera,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,...,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,33
Carmen N. De La Rosa,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,...,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,11.0,20
Charles Barron,12.0,15.0,12.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,...,15.0,15.0,12.0,15.0,12.0,15.0,15.0,15.0,15.0,34
Chi A. Ossé,11.0,15.0,11.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,...,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,11.0,8
Christopher Marte,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,...,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,19


In [115]:
clusters_coords = make_base_map(clusters,cc_df,gdf)

In [116]:
centroid = clusters_coords.geometry.centroid
mean_lat, mean_lon = centroid.y.mean(), centroid.x.mean()


Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [125]:
clusters_coords['h_cluster'].value_counts()

h_cluster
2    36
1     8
3     6
4     1
Name: count, dtype: int64

In [117]:
app = Dash(__name__)

In [118]:
geojson = json.loads(clusters_coords.to_json())

In [108]:
style = {
    'weight': 2, 
    'opacity': 1, 
    'color': 'white',  # Line color
    'dashArray': '3', 
    'fillOpacity': 0.7
}

In [120]:
def generate_colorscale(num_clusters):
    cmap = plt.get_cmap('tab20')  # Or any other suitable colormap
    return [cmap(i / num_clusters) for i in range(num_clusters)]

In [121]:
@app.callback(
    Output('geojson', 'hideout'),
    [Input('submit-button', 'n_clicks')],
    [State('num-clusters-input', 'value')]
)
def update_map_style(n_clicks, num_clusters):
    if not callback_context.triggered:
        raise PreventUpdate
    colorscale = generate_colorscale(num_clusters)
    classes = list(range(1, num_clusters + 1))
    return {'classes': classes, 'colorscale': colorscale, 'style': style}

In [122]:
style_handle = assign("""function(feature){
    const h_cluster = feature.properties.h_cluster;
    let color = '#FFEDA0';  // Default color

    if (h_cluster === 1) {
        color = '#FED976';  // Color for h_cluster 1
    } else if (h_cluster === 2) {
        color = '#FC4E2A';  // Color for h_cluster 2
    } // Add more conditions as needed

    return { fillColor: color, weight: 1, opacity: 1, color: 'black', fillOpacity: 0.7 };
}""")


In [123]:
app = Dash(__name__)

centroid = clusters_coords.geometry.centroid
mean_lat, mean_lon = centroid.y.mean(), centroid.x.mean()


app.layout = html.Div([
    dl.Map(center=[mean_lat, mean_lon], zoom=12, children=[
        dl.TileLayer(),
        dl.GeoJSON(data=geojson, style=style_handle, 
                     hideout={'classes': [], 'colorscale': [], 'style': style}, id="geojson")
    ], style={'width': '100%', 'height': '50vh'}),
])



Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [116]:
app.layout = html.Div([
    html.Button('Generate Dendrogram', id='dendrogram-button', n_clicks=0),
    dcc.Graph(id='dendrogram'),
    dcc.Input(id='num-clusters-input', type='number', placeholder='Enter number of clusters'),
    html.Button('Show Clusters on Map', id='cluster-map-button', n_clicks=0),
    dl.Map(
        [dl.TileLayer(url='https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png')],  
        id='map',
        center=[40.7128, -74.0060],  
        zoom=6,
        style={'height': '50vh'}
    ),
])

@app.callback(
    Output('dendrogram', 'figure'),
    Input('dendrogram-button', 'n_clicks'),
    prevent_initial_call=True
)
def generate_dendrogram(n_clicks):
    if n_clicks > 0: 
        Z = start_hierarchical(votes_df)
    return ff.create_dendrogram(Z)

@app.callback(
    Output('map', 'children'),
    [Input('cluster-map-button', 'n_clicks')],
    [State('num-clusters-input', 'value')],
    prevent_initial_call=True
)
def show_clusters_on_map(n_clicks, threshold):
    if n_clicks > 0 and threshold:
        Z = start_hierarchical(votes_df)
        clusters = cluster(Z, votes_df, threshold)
        clusters_coords = make_base_map(clusters, cc_df, gdf)
        tile_layer = dl.TileLayer(url='https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png')
        markers = [dl.Marker(position=coord) for coord in clusters_coords]
        return [tile_layer] + markers
    return [dl.TileLayer(url='https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png')]

Preserving functional map

In [127]:
app = Dash(__name__)

In [170]:
geojson_data = json.loads(clusters_coords.to_json())

In [129]:
style_handle = assign("""function(feature){
    const h_cluster = feature.properties.h_cluster;
    let color = '#FFEDA0';  // Default color

    if (h_cluster === 1) {
        color = '#FED976';  // Color for h_cluster 1
    } else if (h_cluster === 2) {
        color = '#FC4E2A';  // Color for h_cluster 2
    } // Add more conditions as needed

    return { fillColor: color, weight: 1, opacity: 1, color: 'black', fillOpacity: 0.7 };
}""")


In [130]:
centroid = clusters_coords.geometry.centroid
mean_lat, mean_lon = centroid.y.mean(), centroid.x.mean()

app.layout = html.Div([
    dl.Map(center=[mean_lat, mean_lon], zoom=12, children=[
        dl.TileLayer(),
        dl.GeoJSON(data=geojson, style=style_handle, id="geojson")
    ], style={'width': '100%', 'height': '50vh'}),
])


Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [None]:
app.run_server(mode='inline')

End

Improvement 

In [228]:
def generate_colorscale(num_clusters):
    cmap = plt.get_cmap('tab20')
    colorscale = [
        "rgba({},{},{},{})".format(int(r * 255), int(g * 255), int(b * 255), a)
        for r, g, b, a in [cmap(i / num_clusters) for i in range(num_clusters)]
    ]
    return colorscale

In [240]:
app = Dash(__name__)

In [230]:
num_clusters = 5
colors = generate_colorscale(num_clusters)
style = dict(weight=2, opacity=1, color='white', dashArray='3', fillOpacity=0.7)

In [231]:
colors

['rgba(31,119,180,1.0)',
 'rgba(44,160,44,1.0)',
 'rgba(148,103,189,1.0)',
 'rgba(227,119,194,1.0)',
 'rgba(188,189,34,1.0)']

In [232]:
style_handle = assign("""function(feature, context){
    const {num_clusters, colorscale, style} = context.hideout;  // get properties from hideout
    const value = feature.properties.h_cluster;   // get the cluster number
    for (let i = 0; i < num_clusters; ++i) {      // iterate over the number of clusters
        if (value === i) {                        // compare with the cluster index
            style.fillColor = colorscale[i];      // set the fill color according to the index
            break;                                // break after setting the color
        }
    }
    // Debugging log
    console.log('Feature ID: ' + feature.id + ', Cluster: ' + value + ', Color: ' + style.fillColor);
    return style;
}""")


In [233]:
geojson = dl.GeoJSON(data=geojson_data,  
                     style=style_handle,  
                     hideout=dict(colorscale=colors, num_clusters = num_clusters, style=style),
                     id="geojson")

In [234]:
centroid = clusters_coords.geometry.centroid
mean_lat, mean_lon = centroid.y.mean(), centroid.x.mean()

app.layout = html.Div([
    dl.Map(center=[mean_lat, mean_lon], zoom=12, children=[
        dl.TileLayer(),
        geojson
    ], style={'width': '100%', 'height': '50vh'}),
])


Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [None]:
app.run_server(mode='inline')

End

In [241]:
app.layout = html.Div([
    dl.Map([dl.TileLayer(), dl.GeoJSON(data=geojson_data,
                                       style=style_handle,
                                       hoverStyle=arrow_function(dict(weight=5, color='#666', dashArray='')),
                                       id="geojson")],
           style={'width': '100%', 'height': '50vh'}),
    html.Pre(id="info")  # Placeholder for displaying feature info
])

@app.callback(Output("info", "children"), Input("geojson", "hoverData"))
def display_click_info(feature):
    if feature is not None:
        # Update this to show the properties you're interested in
        return f"Office: {feature['properties']['OfficeRecordFullName']}\nDistrict: {feature['properties']['District']}"
    return "Click on a feature"