In [17]:
from pathlib import Path

import geopandas as gpd
import pandas as pd
import pydeck as pdk
from branca.colormap import LinearColormap
from config import ICON_URL, geyser_hexes, sunset_hexes
from srai.regionalizers import geocode_to_region_gdf
from tqdm import tqdm

In [6]:
pairs_ordered = []

city_data = pd.read_csv("../input_data/nec_urls.csv")

for CITY in city_data["city"].sort_values():
    h3_votes_data = pd.read_json(
        f"../output_data/{CITY}_h3_votes_data.json", orient="records"
    )

    candidates_names = [
        column
        for column in h3_votes_data.columns
        if column != "h3" and not "_" in column
    ]

    ordered_candidates = (
        h3_votes_data[candidates_names].sum().sort_values(ascending=False)
    )

    city_name = city_data[city_data["city"] == CITY].iloc[0]["city_name"]

    pairs_ordered.append((city_name, "Frekwencja", f"{CITY}_turnout.html", True))
    is_first = True
    for i in range(len(ordered_candidates.index)):
        for j in range(i + 1, len(ordered_candidates.index)):
            first = ordered_candidates.index[i]
            second = ordered_candidates.index[j]
            candidate_0 = "".join([x.upper()[0] for x in first.split()])
            candidate_1 = "".join([x.upper()[0] for x in second.split()])

            if candidate_0 < candidate_1:
                pairs_ordered.append(
                    (
                        city_name,
                        f"{first} vs {second}",
                        f"{CITY}_{candidate_0}_{candidate_1}.html",
                        is_first,
                    )
                )
            else:
                pairs_ordered.append(
                    (
                        city_name,
                        f"{first} vs {second}",
                        f"{CITY}_{candidate_1}_{candidate_0}.html",
                        is_first,
                    )
                )

            is_first = False

pairs_df = (
    pd.DataFrame(pairs_ordered, columns=["city", "url_name", "url", "bold"])
    .reset_index()
    .sort_values(by=["bold", "index"], ascending=[False, True])
)
pairs_df

Unnamed: 0,index,city,url_name,url,bold
0,0,Białystok,Frekwencja,bia_turnout.html,True
1,1,Białystok,TRUSKOLASKI Tadeusz vs DĘBOWSKI Henryk,bia_DH_TT.html,True
11,11,Bydgoszcz,Frekwencja,byd_turnout.html,True
12,12,Bydgoszcz,BRUSKI Rafał Piotr vs SCHREIBER Łukasz Paweł,byd_BRP_SŁP.html,True
18,18,Gdańsk,Frekwencja,gda_turnout.html,True
...,...,...,...,...,...
271,271,Zielona Góra,KUBICKI Janusz Krzysztof vs JASIŃSKI Janusz Piotr,zie_JJP_KJK.html,False
272,272,Zielona Góra,KUBICKI Janusz Krzysztof vs HRESZCZYK Adam Piotr,zie_HAP_KJK.html,False
273,273,Zielona Góra,MAĆKOWIAK Grzegorz Przemysław vs JASIŃSKI Janu...,zie_JJP_MGP.html,False
274,274,Zielona Góra,MAĆKOWIAK Grzegorz Przemysław vs HRESZCZYK Ada...,zie_HAP_MGP.html,False


In [19]:
hrefs = []

for _, row in pairs_df.iterrows():
    if row.bold:
        hrefs.append(
            f'<li><a class="dropdown-item" href="./{row.url}"><b>{row.city}</b> - {row.url_name}</a></li>'
        )
    else:
      
        hrefs.append(
            f'<li><a class="dropdown-item" href="./{row.url}">{row.city} - {row.url_name}</a></li>'
        )

html_text = f"""
<div class="dropdown">
  <button class="btn btn-secondary dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false">
    Zmień mapę
  </button>
  <ul class="dropdown-menu" style="max-height: 25vw; overflow: auto;">
    {''.join(hrefs)}
  </ul>
</div>
"""

Path("../output_data/select.html").write_text(html_text)

32779

In [27]:
def generate_map_for_candidates(pair, full_data_geo, area, city_name):
    percent_pair = [f"{col}_%" for col in pair]

    sub_gdf = gpd.GeoDataFrame(full_data_geo, geometry="voronoi")

    sub_gdf["ratio"] = sub_gdf[percent_pair[0]] / (
        sub_gdf[percent_pair[0]] + sub_gdf[percent_pair[1]]
    )
    sub_gdf["diff"] = sub_gdf[percent_pair[0]] - sub_gdf[percent_pair[1]]
    sub_gdf["diff_str"] = sub_gdf["diff"].apply(lambda x: f"{(x * 100.0):.2f}")
    sub_gdf["diff_abs_scale"] = sub_gdf["diff"].abs() / sub_gdf["diff"].abs().max()

    for col in percent_pair:
        sub_gdf[col] = sub_gdf[col].apply(lambda x: f"{(x * 100.0):.2f}")

    sub_gdf["turnout_str"] = sub_gdf["turnout_%"].apply(lambda x: f"{(x * 100.0):.2f}")

    min_diff = min(sub_gdf["diff"].min(), -0.01)
    max_diff = max(sub_gdf["diff"].max(), 0.01)

    min_steps = [min_diff, min_diff * 2 / 3, min_diff / 3]
    max_steps = [max_diff / 3, max_diff * 2 / 3, max_diff]
    full_steps = [*min_steps, 0, *max_steps]

    cmap = LinearColormap(
        geyser_hexes,
        index=[round(x, 3) for x in full_steps],
        vmin=round(min_diff, 3),
        vmax=round(max_diff, 3),
    )

    sub_gdf["rgb"] = sub_gdf["diff"].apply(cmap.rgb_bytes_tuple)
    sub_gdf["okw_html"] = sub_gdf["idx"].apply(
        lambda x: ", ".join(map(str, x))
    )

    pt_data = pd.DataFrame(
        dict(
            lon=[pt.x for pt in full_data_geo.point],
            lat=[pt.y for pt in full_data_geo.point],
        )
    )

    view = pdk.data_utils.compute_view(pt_data)

    area_contour = pdk.Layer(
        "GeoJsonLayer",
        area,
        opacity=0.8,
        pickable=True,
        stroked=True,
        filled=False,
        get_line_width=15,
        line_width_min_pixels=1,
        get_line_color=[0, 0, 0, 128],
    )

    geojson = pdk.Layer(
        "GeoJsonLayer",
        sub_gdf[
            [
                "voronoi",
                *percent_pair,
                "okw_html",
                "diff_str",
                "rgb",
                "diff_abs_scale",
                "turnout_str",
            ]
        ],
        opacity=0.8,
        pickable=True,
        stroked=True,
        filled=True,
        get_line_width=5,
        line_width_min_pixels=1,
        get_fill_color="[rgb[0], rgb[1], rgb[2], 25 + (230 * diff_abs_scale)]",
        get_line_color=[0, 0, 0, 25],
    )

    icon_data = {
        "url": ICON_URL,
        "width": 242,
        "height": 242,
        "anchorY": 242,
    }

    pt_data["icon_data"] = [icon_data for _ in pt_data.index]

    icon_layer = pdk.Layer(
        opacity=0.8,
        type="IconLayer",
        pickable=False,
        data=pt_data,
        get_icon="icon_data",
        get_size=1,
        size_scale=10,
        size_min_pixels=10,
        size_max_pixels=120,
        get_position=["lon", "lat"],
    )

    legend_cmap = LinearColormap(
        geyser_hexes,
        index=[round(x * 100, 2) for x in full_steps],
        vmin=round(min_diff * 100, 2),
        vmax=round(max_diff * 100, 2),
    )

    select_text = Path("../output_data/select.html").read_text()

    r = pdk.Deck(
        layers=[area_contour, geojson, icon_layer],
        initial_view_state=view,
        map_style="road",
        tooltip={
            "html": f"""
            <b>Obwodowa komisja wyborcza nr: {{okw_html}}</b> <br/>
            <b>{pair[0]}:</b> {{{percent_pair[0]}}}% <br/>
            <b>{pair[1]}:</b> {{{percent_pair[1]}}}% <br/>
            <b>Róźnica:</b> {{diff_str}}% <br/>
            <b>Frekwencja:</b> {{turnout_str}}% <br/>
            """,
            "style": {"color": "white", "font-family": '"Open Sans", sans-serif'},
        },
        description=f"""
        <div style='font-family: "Open Sans", sans-serif;'>
        <h4>{city_name}: {pair[0]} vs {pair[1]}</h4>
        Różnica w % <br/>
        <span style="font-size: 0.8em">Wartości ujemne - przewaga {pair[1]} <br/> Wartości dodatnie - przewaga {pair[0]}</span> <br/>
        {legend_cmap._repr_html_()} <br/>
        <span style="font-size: 0.8em">(Obszary obwodów są przybliżone z wykorzystaniem diagramu Voronoia)</span><br/>
        {select_text}<br/>
        <span style="font-size: 0.8em">Autor: Kamil Raczycki (<a href="https://github.com/RaczeQ" target="_blank">GitHub</a>, <a href="https://www.linkedin.com/in/raczyckikamil/" target="_blank">LinkedIn</a>) <br/> Repozytorium: <a href="https://github.com/RaczeQ/wybory-2024-mapy-miast" target="_blank">https://github.com/RaczeQ/wybory-2024-mapy-miast</a></span>
        </div>
        """,
    )

    candidate_0 = "".join([x.upper()[0] for x in pair[0].split()])
    candidate_1 = "".join([x.upper()[0] for x in pair[1].split()])
    file_name = f"../output_data/{CITY}_{candidate_0}_{candidate_1}.html"

    r.to_html(file_name)

    with open(file_name, "r") as in_file:
        buf = in_file.readlines()

    with open(file_name, mode="w") as f:
        for line in buf:
            if "</style>" in line and "description" not in line:
                line = (
                    """
                    .deck-json-description-box {
                        z-index: 2 !important;
                        overflow: visible !important;
                    }
                    @import url('https://fonts.googleapis.com/css2?family=Open+Sans:ital@0;1&display=swap')
                    """
                    + line
                    + """
                    <link rel="stylesheet" href='https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css' />
                    <script src='https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js'></script>
                    """
                )
            f.write(line)

In [28]:
def generate_map_for_turnout(full_data_geo, area, city_name):
    sub_gdf = gpd.GeoDataFrame(full_data_geo, geometry="voronoi")

    sub_gdf["turnout_str"] = sub_gdf["turnout_%"].apply(lambda x: f"{(x * 100.0):.2f}")
    cmap = LinearColormap(
        sunset_hexes,
        vmin=0,
        vmax=1,
    )

    sub_gdf["rgb"] = sub_gdf["turnout_%"].apply(cmap.rgb_bytes_tuple)
    sub_gdf["okw_html"] = sub_gdf["idx"].apply(
        lambda x: ", ".join(map(str, x))
    )

    max_turnout = sub_gdf["turnout_%"].max()

    sub_gdf["turnout_scale"] = sub_gdf["turnout_%"] = sub_gdf["turnout_%"] / max_turnout

    avg_turnout = sub_gdf["voters_voted"].sum() / sub_gdf["voters_total"].sum()

    pt_data = pd.DataFrame(
        dict(
            lon=[pt.x for pt in full_data_geo.point],
            lat=[pt.y for pt in full_data_geo.point],
        )
    )

    view = pdk.data_utils.compute_view(pt_data)

    area_contour = pdk.Layer(
        "GeoJsonLayer",
        area,
        opacity=0.8,
        pickable=True,
        stroked=True,
        filled=False,
        get_line_width=15,
        line_width_min_pixels=1,
        get_line_color=[0, 0, 0, 128],
    )

    geojson = pdk.Layer(
        "GeoJsonLayer",
        sub_gdf[
            [
                "voronoi",
                "okw_html",
                "rgb",
                "turnout_scale",
                "turnout_str",
            ]
        ],
        opacity=0.8,
        pickable=True,
        stroked=True,
        filled=True,
        get_line_width=5,
        line_width_min_pixels=1,
        get_fill_color="[rgb[0], rgb[1], rgb[2], 25 + (230 * turnout_scale)]",
        get_line_color=[0, 0, 0, 25],
    )

    icon_data = {
        "url": ICON_URL,
        "width": 242,
        "height": 242,
        "anchorY": 242,
    }

    pt_data["icon_data"] = [icon_data for _ in pt_data.index]

    icon_layer = pdk.Layer(
        opacity=0.8,
        type="IconLayer",
        pickable=False,
        data=pt_data,
        get_icon="icon_data",
        get_size=1,
        size_scale=10,
        size_min_pixels=10,
        size_max_pixels=120,
        get_position=["lon", "lat"],
    )

    legend_cmap = LinearColormap(
        sunset_hexes,
        vmin=0,
        vmax=100,
    )

    select_text = Path("../output_data/select.html").read_text()

    r = pdk.Deck(
        layers=[area_contour, geojson, icon_layer],
        initial_view_state=view,
        map_style="road",
        tooltip={
            "html": """
            <b>Obwodowa komisja wyborcza nr: {okw_html}</b> <br/>
            <b>Frekwencja:</b> {turnout_str}% <br/>
            """,
            "style": {"color": "white", "font-family": '"Open Sans", sans-serif'},
        },
        description=f"""
        <div style='font-family: "Open Sans", sans-serif;'>
        <h4>{city_name}: frekwencja</h4>
        Wartość w % <br/>
        <span style="font-size: 0.8em">Maksymalna frekwencja: {100*max_turnout:.2f}% <br/>
        Średnia frekwencja {100*avg_turnout:.2f}%</span> <br/>
        {legend_cmap._repr_html_()} <br/>
        <span style="font-size: 0.8em">(Obszary obwodów są przybliżone z wykorzystaniem diagramu Voronoia)</span><br/>
        {select_text}<br/>
        <span style="font-size: 0.8em">Autor: Kamil Raczycki (<a href="https://github.com/RaczeQ" target="_blank">GitHub</a>, <a href="https://www.linkedin.com/in/raczyckikamil/" target="_blank">LinkedIn</a>) <br/> Repozytorium: <a href="https://github.com/RaczeQ/wybory-2024-mapy-miast" target="_blank">https://github.com/RaczeQ/wybory-2024-mapy-miast</a></span>
        </div>
        """,
    )

    file_name = f"../output_data/{CITY}_turnout.html"

    r.to_html(file_name)

    with open(file_name, "r") as in_file:
        buf = in_file.readlines()

    with open(file_name, mode="w") as f:
        for line in buf:
            if "</style>" in line and "description" not in line:
                line = (
                    """
                    .deck-json-description-box {
                        z-index: 2 !important;
                        overflow: visible !important;
                    }
                    @import url('https://fonts.googleapis.com/css2?family=Open+Sans:ital@0;1&display=swap')
                    """
                    + line
                    + """
                    <link rel="stylesheet" href='https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css' />
                    <script src='https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js'></script>
                    """
                )
            f.write(line)

In [30]:
for CITY in tqdm(city_data["city"].sort_values()):
    polling_districts_data = pd.read_json(
        f"../output_data/{CITY}_polling_districts_data.json", orient="records"
    )
    h3_votes_data = pd.read_json(
        f"../output_data/{CITY}_h3_votes_data.json", orient="records"
    )
    geo_distinct_data = gpd.read_file(f"../output_data/{CITY}_geo_data.geojson")
    voronoi_regions = gpd.read_file(f"../output_data/{CITY}_voronoi_data.geojson")

    candidates_names = [
        column
        for column in h3_votes_data.columns
        if column != "h3" and not "_" in column
    ]

    if "Numer_obwodu" in polling_districts_data.columns:
        polling_districts_data = polling_districts_data.rename(
            columns={"Numer_obwodu": "idx"}
        )

    full_data_geo = (
        h3_votes_data.merge(geo_distinct_data.reset_index(), on="h3")
        .merge(voronoi_regions.reset_index(), left_on="h3", right_on="region_id")
        .merge(
            polling_districts_data.groupby("h3")["idx"].apply(list),
            on="h3",
        )
    )
    full_data_geo = full_data_geo.rename(
        columns={"geometry_x": "point", "geometry_y": "voronoi"}
    )

    area = geocode_to_region_gdf(
        city_data[city_data["city"] == CITY].iloc[0]["geocode"]
    )

    city_name = city_data[city_data["city"] == CITY].iloc[0]["city_name"]

    generate_map_for_turnout(full_data_geo, area, city_name)

    for i in range(len(candidates_names)):
        for j in range(i + 1, len(candidates_names)):
            pair = [candidates_names[i], candidates_names[j]]
            generate_map_for_candidates(pair, full_data_geo, area, city_name)

100%|██████████| 18/18 [00:51<00:00,  2.85s/it]
