# 16. Identify possible map coverage errors (required) (sequence diagram not required)
- Identify the geographic area defined as the difference between the geographic area covered by the
state and the geographic area covered by the union of all precincts in the state. 
- Store the associated data in the errors data structure for subsequent addition to the DB. 
- The regions so identified are potential ghost precincts. 

In [1]:
!ls

[1m[34mCounty[m[m            [1m[34mPrecinct[m[m          cleanState.ipynb  test_ghost.json
DetectGhost.ipynb [1m[34mState[m[m             stateDif.json


In [2]:
import geopandas as gpd
from shapely.geometry import Polygon, MultiPolygon

In [3]:
precinctGDF = gpd.read_file("../TX_Precinct_Merged.json")

In [4]:
# drop some unneeded columns
c = list(precinctGDF.columns)
c.remove("Precinct #")
c.remove("geometry")
precinctGDF.drop(c, inplace=True, axis=1)
precinctGDF.head(1)

Unnamed: 0,Precinct #,geometry
0,1,"POLYGON ((-95.60102 31.74519, -95.60095 31.745..."


In [5]:
# drop some unneeded columns
stateGDF = gpd.read_file("State/TX_State.json")
c = list(stateGDF.columns)
c.remove("geometry")
stateGDF.drop(c, inplace=True, axis=1)
stateGDFcopy = stateGDF.copy()
#stateGDF = stateGDF.to_crs("EPSG:3857")

# get difference of state and state with buffer

In [6]:
stateEnlarged = stateGDFcopy.to_crs("EPSG:3857").unary_union.buffer(60.96)
stateEnlarged = gpd.GeoDataFrame({"geometry":stateEnlarged}, crs="EPSG:3857")
stateEnlarged = stateEnlarged.to_crs("EPSG:4326")
stateDif = gpd.overlay(stateEnlarged, stateGDF, how="difference")
stateDif.to_file("stateDif.json", driver="GeoJSON")

# get difference of precinct and regular state

In [7]:
precinctGDF = precinctGDF.unary_union
# precinctGDF = precinctGDF.buffer(60.96)

In [8]:
gdf = gpd.GeoDataFrame({"geometry":precinctGDF}, index=[0])

In [10]:
statePrecinctdif = gpd.overlay(stateGDF, gdf, how="difference")

In [11]:
statePrecinctdifCopy = statePrecinctdif.copy()

In [12]:
statePrecinctdif.to_file("statePrecinctdif.json", driver="GeoJSON")

# turn all multipolygon into polygon

In [13]:
newgdf = gpd.GeoDataFrame(crs="EPSG:4326")
newgdf["geometry"] = ""
newgdf["index"] = 0

In [14]:
i = 0
for index, row in statePrecinctdif.iterrows():
    g = row["geometry"]
    if g.geom_type == "MultiPolygon":
        for shape in g:
            newgdf.at[i, "geometry"] = shape
            newgdf.at[i, "index"] = i
            i += 1
    else:
        newgdf.at[i, "geometry"] = g
        newgdf.at[i, "index"] = i
        i += 1
newgdf.set_geometry("geometry", inplace=True)

In [15]:
newgdf.head(1)

Unnamed: 0,geometry,index
0,"POLYGON ((-93.51492 31.02570, -93.52231 31.020...",0.0


In [16]:
newgdf.to_file("statePrecinctdif.json", driver="GeoJSON")

# filter by area

In [None]:
# dif.to_file("RI_Ghost.json", driver="GeoJSON")

In [18]:
newgdf = newgdf.to_crs("EPSG:3857")

In [19]:
a = 3716.1216
newgdf["filtered"] = ""
for index, row in newgdf.iterrows():
    if row.geometry.geom_type == "MultiPolygon":
        m = MultiPolygon([shape for shape in row["geometry"] if shape.area >= a])
        if m.is_empty:
            newgdf.drop(index, inplace=True)
        else:
            newgdf.at[index, "filtered"] = m
    else:
        if row["geometry"].area < a:
            newgdf.drop(index, inplace=True)
        else:
            newgdf.at[index, "filtered"] = row["geometry"]

In [20]:
newgdf.set_geometry("filtered", drop=True, inplace=True)

In [21]:
newgdf = newgdf.to_crs("EPSG:4326")

In [22]:
newgdf.to_file("statePrecinctdif.json", driver="GeoJSON")

# drop precincts that is fully inside stateDif
- stateDif is the difference between og state and state buffered by 200feet

In [52]:
for index, row in stateDif.iterrows():
    index_drop = newgdf[newgdf["geometry"].within(stateDif.loc[index]["geometry"])].index.to_list()
    for i in index_drop:
        newgdf.drop(index=i, inplace=True)
        print("#", end="")

# delete unwanted polygons

In [53]:
def deletePoly(gdf, index):
    gdf.drop(index=index, inplace=True)
    return gdf

In [20]:
# index_to_delete = [46, 43, 44, 45, 39, 34, 33, 27, 26, 23, 14, 35, 38, 40, 42]
for i in index_to_delete:
    newgdf = deletePoly(newgdf, i)

# create key for ghost table

In [54]:
newgdf.head(1)

Unnamed: 0,geometry,index
0,"POLYGON ((-93.51492 31.02570, -93.52231 31.020...",0.0


In [55]:
newgdf["id"] = ""
i = 0
for index, row in newgdf.iterrows():
    newgdf.at[index, "id"] = "tx-ghost-" + str(i)
    i += 1
newgdf.head(2)

Unnamed: 0,geometry,index,id
0,"POLYGON ((-93.51492 31.02570, -93.52231 31.020...",0.0,tx-ghost-0
10,"POLYGON ((-93.52855 31.07060, -93.52896 31.069...",10.0,tx-ghost-1


In [56]:
newgdf.to_file("TX_Ghost.json", driver="GeoJSON")