In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
from sklearn.preprocessing import StandardScaler

In [None]:
def polynomial(x, a, b, c):
    return a * x**2 + b * x + c

def gingles_test_race(working_directory, geojson, race_column, threshold=15):
    gdf = gpd.read_file(f"{working_directory}/{geojson}")
    gdf["PCT_RACE"] = gdf[race_column] / gdf["TOT_POP"] * 100
    gdf = gdf[gdf["PCT_RACE"] >= threshold]
    gdf = gdf.dropna(subset=["PCT_RACE", "PCT_DEM", "PCT_REP"])
    
    precinct_key = gdf["SRPREC_KEY"]
    x_data = gdf["PCT_RACE"]
    y_dem = gdf["PCT_DEM"]
    y_rep = gdf["PCT_REP"]
    
    try:
        popt_dem, _ = curve_fit(polynomial, x_data, y_dem)
        popt_rep, _ = curve_fit(polynomial, x_data, y_rep)
    except ValueError as e:
        print(f"Error in curve fitting: {e}")
        return None
        
    x_fit = np.linspace(min(x_data), max(x_data), 500)
    y_fit_dem = polynomial(x_fit, *popt_dem)
    y_fit_rep = polynomial(x_fit, *popt_rep)
    
    precinct_data = pd.DataFrame({
        "Precinct_key": precinct_key,
        "Race Column": race_column,
        "PCT_RACE": x_data,
        "PCT_DEM": y_dem,
        "PCT_REP": y_rep
    })
    
    plt.figure(figsize=(10, 6))
    plt.scatter(x_data, y_dem, color="blue", label="Democratic Votes", alpha=0.5)
    plt.scatter(x_data, y_rep, color="red", label="Republican Votes", alpha=0.5)
    plt.plot(x_fit, y_fit_dem, color="blue", label="Democratic Fit", linewidth=2)
    plt.plot(x_fit, y_fit_rep, color="red", label="Republican Fit", linewidth=2)
    plt.title(f"Vote Percentage for Democrat vs Republican by Percentage of {race_column} Voters in Precincts")
    plt.xlabel(f"Percentage (%) of {race_column} Voters")
    plt.ylabel("Vote Percentage (%)")
    plt.grid(color="gray", linestyle="--", linewidth=0.5)
    plt.legend()
    plt.show()
    
    return precinct_data

def gingles_test_income(working_directory, geojson_filename, region=None):
    """
    Performs a 2/3 Gingles test to analyze vote percentages by income.
    
    Parameters:
        working_directory (str): Directory containing the GeoJSON file.
        geojson_filename (str): Name of the GeoJSON file.
        region (str, optional): Region to filter ('Urban', 'Rural', or 'Suburban').
    
    Returns:
        pd.DataFrame: Data with precinct keys, median income, and vote percentages.
    """
    gdf = gpd.read_file(f"{working_directory}/{geojson_filename}")
    gdf = gdf[gdf["TOT_HOUS21"] > 0.0]
    if region:
        gdf = gdf[gdf["Category"] == region]
        
    precinct_keys = gdf["SRPREC_KEY"]
    income_data = gdf["MEDN_INC21"]
    vote_dem = gdf["PCT_DEM"]
    vote_rep = gdf["PCT_REP"]

    popt_dem, _ = curve_fit(polynomial, income_data, vote_dem)
    popt_rep, _ = curve_fit(polynomial, income_data, vote_rep)
    income_fit = np.linspace(min(income_data), max(income_data), 500)
    vote_fit_dem = polynomial(income_fit, *popt_dem)
    vote_fit_rep = polynomial(income_fit, *popt_rep)

    precinct_data = pd.DataFrame({
        "Precinct_key": precinct_keys,
        "MEDN_INC": income_data,
        "PCT_DEM": vote_dem,
        "PCT_REP": vote_rep,
    })

    plt.figure(figsize=(10, 6))
    plt.scatter(income_data, vote_dem, color="blue", label="Democratic Votes", alpha=0.5)
    plt.scatter(income_data, vote_rep, color="red", label="Republican Votes", alpha=0.5)
    plt.plot(income_fit, vote_fit_dem, color="blue", label="Democratic Fit", linewidth=2)
    plt.plot(income_fit, vote_fit_rep, color="red", label="Republican Fit", linewidth=2)
    plt.title("Vote Percentage by Median Income")
    plt.xlabel("Median Income")
    plt.ylabel("Vote Percentage (%)")
    plt.grid(color="gray", linestyle="--", linewidth=0.5)
    plt.legend()
    plt.show()

    return precinct_data


def gingles_test_race_income(working_directory, geojson_filename, race_column, threshold=15):
    gdf = gpd.read_file(f"{working_directory}/{geojson_filename}")
    gdf["PCT_RACE"] = gdf[race_column] / gdf["TOT_POP"] * 100
    gdf = gdf[gdf["PCT_RACE"] >= threshold]
    gdf = gdf.dropna(subset=["PCT_RACE", "PCT_DEM", "PCT_REP", "MEDN_INC21"])

    if gdf.empty:
        print("Error: No data points meet the filtering criteria.")
        return None

    # Standardize income and race percentages for linear calculation 
    scaler = StandardScaler()
    gdf["income_scaled"] = scaler.fit_transform(gdf[["MEDN_INC21"]])
    gdf["race_scaled"] = scaler.fit_transform(gdf[["PCT_RACE"]])
    gdf["combined_metric"] = (gdf["income_scaled"] + gdf["race_scaled"]) / 2

    precinct_keys = gdf["SRPREC_KEY"]
    combined_metric = gdf["combined_metric"]
    vote_dem = gdf["PCT_DEM"]
    vote_rep = gdf["PCT_REP"]

    try:
        popt_dem, _ = curve_fit(polynomial, combined_metric, vote_dem)
        popt_rep, _ = curve_fit(polynomial, combined_metric, vote_rep)
    except ValueError as e:
        print(f"Error in curve fitting: {e}")
        return None

    metric_fit = np.linspace(min(combined_metric), max(combined_metric), 500)
    vote_fit_dem = polynomial(metric_fit, *popt_dem)
    vote_fit_rep = polynomial(metric_fit, *popt_rep)

    precinct_data = pd.DataFrame({
        "Precinct_key": precinct_keys,
        "PCT_RACE_INC": combined_metric,
        "PCT_DEM": vote_dem,
        "PCT_REP": vote_rep,
    })
    
    plt.figure(figsize=(10, 6))
    plt.scatter(combined_metric, vote_dem, color="blue", label="Democratic Votes", alpha=0.5)
    plt.scatter(combined_metric, vote_rep, color="red", label="Republican Votes", alpha=0.5)
    plt.plot(metric_fit, vote_fit_dem, color="blue", label="Democratic Fit", linewidth=2)
    plt.plot(metric_fit, vote_fit_rep, color="red", label="Republican Fit", linewidth=2)
    plt.title(f"Vote Percentage by {race_column} Voters and Median Income")
    plt.xlabel(f"Standardized Metric ({race_column} + Income)")
    plt.ylabel("Vote Percentage (%)")
    plt.grid(color="gray", linestyle="--", linewidth=0.5)
    plt.legend()
    plt.show()

    return precinct_data