In [20]:
import warnings
warnings.filterwarnings('ignore')

import geopandas as gpd
from shapely.geometry import MultiPolygon, Polygon
import pandas as pd

In [21]:
gdf = gpd.read_file("all_states_precincts_cname_neighbor_demo_OG_anomalous.json")

In [22]:
va = gdf[gdf["State"] == "va"]
rest = gdf[gdf["State"] != "va"]

In [23]:
va.head(1)

Unnamed: 0,County,Precinct #,Hillary Clinton/Dem,Donald J. Trump/Rep,Gary Johnson/Lib,Jill Stein/Grn,Other/Other,Total Votes,State,CName,...,Total Population,White,Black or African American,American Indian,Asian,Other Population,MissingVote,MissingDemo,VoteDemoConflict,geometry
9501,Accomack,1,486,637,16,8,4,2328,va,va-accomack-0001,...,3526,2336,906,12,20,246,,,,"POLYGON ((-75.66233 37.77053, -75.66244 37.770..."


In [24]:
# shape with area < 0.00001 will be dropped
area_threshold = 0.000001
for index, row in va.iterrows():
    cur_geometry = row["geometry"]
    polygon_list = []
    if cur_geometry.geom_type == "MultiPolygon":
        for shape in cur_geometry:
            if shape.area >= area_threshold:
                polygon_list.append(shape)
        if len(polygon_list) == 1:
            va.at[index, "new_geo"] = polygon_list[0]
        else:
            new_multipoly = MultiPolygon(polygon_list)
            va.at[index, "new_geo"] = new_multipoly
    else:
        va.at[index, "new_geo"] = cur_geometry
va.set_geometry("new_geo", inplace=True, drop=True)

In [25]:
METER_COORD_SYSTEM = "EPSG:3857"
GEO_COORD_SYSTEM = "EPSG:4326"
ENLARGE_SIZE = 30.48 # 30.48 meters = 100 feets

def generate_neighbors(gdf): 
    gdf = gdf.to_crs(METER_COORD_SYSTEM) # convert to coordinate system that uses meters
    state_identifiers = gdf["State"].unique().tolist()
    stateGDF_with_neighbors = []
    for stateID in state_identifiers:
        cur_state_gdf = gdf[gdf["State"] == stateID]
        cur_state_gdf["enlarged_geometry"] = ""
        for index, row in cur_state_gdf.iterrows():
            cur_geometry = row["geometry"]
            cur_state_gdf.at[index, "enlarged_geometry"] = cur_geometry.buffer(ENLARGE_SIZE)
        cur_state_gdf.set_geometry("enlarged_geometry", inplace=True)

        cur_state_gdf["Neighbors"] = ""
        for index, row in cur_state_gdf.iterrows():
            cur_geometry = row["geometry"]
            cur_CName = row["CName"]
            neighbors = cur_state_gdf[cur_state_gdf["enlarged_geometry"].intersects(cur_geometry)]["CName"].tolist()
            neighbors.remove(cur_CName)
            neighbors = [cur_CName + "," + neighbor for neighbor in neighbors] # [100, 101; 100, 102; ...]
            cur_state_gdf.at[index, "Neighbors"] = "; ".join(neighbors)

        cur_state_gdf.set_geometry("geometry", inplace=True, drop=True)
        stateGDF_with_neighbors.append(cur_state_gdf)
    
    merged_gdf = pd.concat(stateGDF_with_neighbors, ignore_index=True)
    merged_gdf = merged_gdf.to_crs(GEO_COORD_SYSTEM) # convert back to original coordinate
    return merged_gdf

In [26]:
all_state = pd.concat([va, rest], ignore_index=_index=nore_index=True)

In [29]:
all_state.head(2)

Unnamed: 0,County,Precinct #,Hillary Clinton/Dem,Donald J. Trump/Rep,Gary Johnson/Lib,Jill Stein/Grn,Other/Other,Total Votes,State,CName,...,Total Population,White,Black or African American,American Indian,Asian,Other Population,MissingVote,MissingDemo,VoteDemoConflict,geometry
0,Accomack,1,486,637,16,8,4,2328,va,va-accomack-0001,...,3526,2336,906,12,20,246,,,,"POLYGON ((-75.66233 37.77053, -75.66244 37.770..."
1,Accomack,2,175,553,13,1,1,1500,va,va-accomack-0002,...,1522,900,492,4,7,112,,,VoteDemoConflict,"POLYGON ((-75.41651 37.93483, -75.41645 37.934..."


In [30]:
all_state_neighbors = generate_neighbors(all_state)

In [33]:
all_state_neighbors.to_file("all_states_precincts_cname_neighbor_demo_OG_anomalous_fixNeighborVA.json", driver="GeoJSON")

In [41]:
va_neighbors = all_state_neighbors[all_state_neighbors["State"] == "va"]

In [42]:
df = pd.DataFrame(columns=["CName", "Neighbor"])
for index, row in va_neighbors.iterrows():
    neighbors = row["Neighbors"]
    neighbors = neighbors.split(";")
    neighbors = [n.strip() for n in neighbors]
    
    for n in neighbors:
        df = df.append({"CName":row["CName"], "Neighbor": n}, ignore_index=True)
        
df.head()

Unnamed: 0,CName,Neighbor
0,va-accomack-0001,"va-accomack-0001,va-accomack-0010"
1,va-accomack-0001,"va-accomack-0001,va-accomack-0011"
2,va-accomack-0001,"va-accomack-0001,va-accomack-0013"
3,va-accomack-0001,"va-accomack-0001,va-accomack-0014"
4,va-accomack-0001,"va-accomack-0001,va-accomack-0015"


In [44]:
df.to_csv("VA_Neighbors_Fixed_Parsed.csv", index=False)

11947