In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
from sklearn.preprocessing import StandardScaler

In [None]:
# polynomial function
def polynomial(x, a, b, c):
    return a * x**2 + b * x + c

# normal gingles test
def gingles_test_race(working_directory, geojson, race_column, threshold = 15):
    gdf = gpd.read_file(working_directory + geojson)
    gdf["PCT_RACE"] = gdf[race_column] / gdf["TOT_POP"] * 100

    # Fliter out precincts where pct race is less than 15%
    gdf = gdf[gdf["PCT_RACE"] >= threshold]
    
    gdf = gdf.dropna(subset=['PCT_RACE', 'PCT_DEM', 'PCT_REP'])
    
    precinct_key = gdf['SRPREC_KEY']
    x_data = gdf['PCT_RACE']
    y_dem = gdf['PCT_DEM']
    y_rep = gdf['PCT_REP']
    
    try:
        popt_dem, _ = curve_fit(polynomial, x_data, y_dem)
        popt_rep, _ = curve_fit(polynomial, x_data, y_rep)
    except ValueError as e:
        print("Error in curve fitting:", e)
        return None
    
    x_fit = np.linspace(min(x_data), max(x_data), 500)
    y_fit_dem = polynomial(x_fit, *popt_dem)
    y_fit_rep = polynomial(x_fit, *popt_rep)

    precinct_data = pd.DataFrame({
        'Precinct_key': precinct_key,
        'Race Column': race_column,
        'PCT_RACE': x_data,
        'PCT_DEM': y_dem,
        'PCT_REP': y_rep
    })
    
    plt.figure(figsize=(10, 6))
    plt.scatter(x_data, y_dem, color='blue', label='Democratic Votes', alpha=0.05)
    plt.scatter(x_data, y_rep, color='red', label='Republican Votes', alpha=0.05)
    plt.plot(x_fit, y_fit_dem, color='blue', label='Democratic Fit', linewidth=2)
    plt.plot(x_fit, y_fit_rep, color='red', label='Republican Fit', linewidth=2)
    plt.title(f'Vote Percentage for Democrat vs Republican by Percentage of {race_column} Voters in Precincts')
    plt.xlabel(f'Percentage (%) of {race_column} Voters')
    plt.ylabel('Vote Percentage (%)')
    plt.grid(color='gray', linestyle='--', linewidth=0.5)
    plt.legend()
    plt.show
    
    return precinct_data

# 2/3 gingles test for income
def gingles_test_income(working_directory, geojson, region=None):
    gdf = gpd.read_file(working_directory + geojson)
    
    gdf = gdf[gdf["TOT_HOUS21"] > 0.0]

    if region != None:
        if region == 'Urban':
            gdf = gdf[gdf["Category"] == "Urban"]
        elif region == 'Rural':
            gdf = gdf[gdf["Category"] == "Rural"]
        else:
            gdf = gdf[gdf["Category"] == "Suburban"]
        
    precinct_key = gdf['SRPREC_KEY']
    x_data = gdf['MEDN_INC21']
    y_dem = gdf['PCT_DEM']
    popt_dem, _ = curve_fit(polynomial, x_data, y_dem)
    y_rep = gdf['PCT_REP']
    popt_rep, _ = curve_fit(polynomial, x_data, y_rep)
    x_fit = np.linspace(min(x_data), max(x_data), 500)
    y_fit_dem = polynomial(x_fit, *popt_dem)
    y_fit_rep = polynomial(x_fit, *popt_rep)

    precinct_data = pd.DataFrame({
        'Precinct_key': precinct_key,
        'MEDN_INC': x_data,
        'PCT_DEM': y_dem,
        'PCT_REP': y_rep
    })
    
    plt.figure(figsize=(10, 6))
    plt.scatter(x_data, y_dem, color='blue', label='Democratic Votes', alpha=0.05)
    plt.scatter(x_data, y_rep, color='red', label='Republican Votes', alpha=0.05)
    plt.plot(x_fit, y_fit_dem, color='blue', label='Democratic Fit', linewidth=2)
    plt.plot(x_fit, y_fit_rep, color='red', label='Republican Fit', linewidth=2)
    plt.title(f'Vote Percentage for Democrat vs Republican by Average Income in Precincts')
    plt.xlabel(f'Average Income')
    plt.ylabel('Vote Percentage (%)')
    plt.grid(color='gray', linestyle='--', linewidth=0.5)
    plt.legend()
    plt.show

    return precinct_data

# 2/3 gingles test for income and race
def gingles_test_race_income(working_directory, geojson, race_column, threshold = 15):
    gdf = gpd.read_file(working_directory + geojson)
    gdf["PCT_RACE"] = gdf[race_column] / gdf["TOT_POP"] * 100

    # Fliter out precincts where pct race is less than 15%
    gdf = gdf[gdf["PCT_RACE"] >= threshold]
    
    gdf = gdf.dropna(subset=['PCT_RACE', 'PCT_DEM', 'PCT_REP', 'MEDN_INC21'])

    if gdf.empty:
        print("Error: No data points meet the filtering criteria.")
        return None
    
    if gdf[['MEDN_INC21']].dropna().shape[0] == 0:
        print("Error: No valid data for MEDN_INC21 after filtering.")
        return None

    scaler = StandardScaler()
    gdf['income_scaled'] = scaler.fit_transform(gdf[['MEDN_INC21']])
    gdf['race_scaled'] = scaler.fit_transform(gdf[['PCT_RACE']])
    gdf['combined_metric'] = (gdf['income_scaled'] + gdf['race_scaled']) / 2
    
    precinct_key = gdf['SRPREC_KEY']
    x_data = gdf['combined_metric']
    y_dem = gdf['PCT_DEM']
    y_rep = gdf['PCT_REP']
    
    try:
        popt_dem, _ = curve_fit(polynomial, x_data, y_dem)
        popt_rep, _ = curve_fit(polynomial, x_data, y_rep)
    except ValueError as e:
        print("Error in curve fitting:", e)
        return None
    
    x_fit = np.linspace(min(x_data), max(x_data), 500)
    
    y_fit_dem = polynomial(x_fit, *popt_dem)
    y_fit_rep = polynomial(x_fit, *popt_rep)

    precinct_data = pd.DataFrame({
        'Precinct_key': precinct_key,
        'Race Column': race_column,
        'PCT_RACE_INC': x_data,
        'PCT_DEM': y_dem,
        'PCT_REP': y_rep
    })
    
    plt.figure(figsize=(10, 6))
    plt.scatter(x_data, y_dem, color='blue', label='Democratic Votes', alpha=0.05)
    plt.scatter(x_data, y_rep, color='red', label='Republican Votes', alpha=0.05)
    plt.plot(x_fit, y_fit_dem, color='blue', label='Democratic Fit', linewidth=2)
    plt.plot(x_fit, y_fit_rep, color='red', label='Republican Fit', linewidth=2)
    plt.title(f'Vote Percentage for Democrat vs Republican by Percentage of {race_column} Voters and Average Income in Precincts')
    plt.xlabel(f'Standarized Metric for {race_column} Voters and Average Income')
    plt.ylabel('Vote Percentage (%)')
    plt.grid(color='gray', linestyle='--', linewidth=0.5)
    plt.legend()
    plt.show

    return precinct_data