In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
from sklearn.preprocessing import StandardScaler

# Gingles Analysis
working_directory = "/Users/stanleymui/Downloads/CSE 416 Preprocessing Data/"

In [None]:
def polynomial(x, a, b, c):
    return a * x**2 + b * x + c

def gingles_test_race(working_directory, geojson, race_column):
    gdf = gpd.read_file(working_directory + geojson)
    gdf["PCT_RACE"] = gdf[race_column] / gdf["TOT_POP"] * 100

    # Fliter out precincts where pct race is less than 15%
    gdf = gdf[gdf["PCT_RACE"] >= 15]
    
    gdf = gdf.dropna(subset=['PCT_RACE', 'PCT_DEM', 'PCT_REP'])
    
    x_data = gdf['PCT_RACE']
    
    y_dem = gdf['PCT_DEM']
    popt_dem, _ = curve_fit(polynomial, x_data, y_dem)
    
    y_rep = gdf['PCT_REP']
    popt_rep, _ = curve_fit(polynomial, x_data, y_rep)
    
    x_fit = np.linspace(min(x_data), max(x_data), 500)
    
    y_fit_dem = polynomial(x_fit, *popt_dem)
    y_fit_rep = polynomial(x_fit, *popt_rep)

    precinct_data = pd.DataFrame({
        'Race Column': race_column,
        'PCT_RACE': x_data,
        'PCT_DEM': y_dem,
        'PCT_REP': y_rep
    })
    
    plt.figure(figsize=(10, 6))
    plt.scatter(x_data, y_dem, color='blue', label='Democratic Votes', alpha=0.05)
    plt.scatter(x_data, y_rep, color='red', label='Republican Votes', alpha=0.05)
    plt.plot(x_fit, y_fit_dem, color='blue', label='Democratic Fit', linewidth=2)
    plt.plot(x_fit, y_fit_rep, color='red', label='Republican Fit', linewidth=2)
    plt.title(f'Vote Percentage for Democrat vs Republican by Percentage of {race_column} Voters in Precincts')
    plt.xlabel(f'Percentage (%) of {race_column} Voters')
    plt.ylabel('Vote Percentage (%)')
    plt.grid(color='gray', linestyle='--', linewidth=0.5)
    plt.legend()
    plt.show
    
    return precinct_data

all_precinct_data = pd.DataFrame()

race_columns = ["POP_WHT", "POP_ASN", "POP_HISLAT", "POP_BLK", "POP_AINDALK", "POP_HIPI", "POP_OTH", "POP_TWOMOR"]

for race_col in race_columns:
    precinct_data = gingles_test_race(working_directory, "precinct_voting_demographic.geojson", race_col)
    all_precinct_data = pd.concat([all_precinct_data, precinct_data], ignore_index=True)


print(all_precinct_data)

# output_csv_path = working_directory + "precinct_voting_demographic_analysis.csv"
# all_precinct_data.to_csv(output_csv_path, index=False)

In [None]:
# 2/3 gingles test for income
def gingles_test_income(working_directory, geojson, region=None):
    gdf = gpd.read_file(working_directory + geojson)
    
    gdf = gdf[gdf["TOT_HOUS21"] > 0.0]

    if region != None:
        if region == 'Urban':
            gdf = gdf[gdf["Category"] == "Urban"]
        elif region == 'Rural':
            gdf = gdf[gdf["Category"] == "Rural"]
        else:
            gdf = gdf[gdf["Category"] == "Suburban"]
        
    
    x_data = gdf['MEDN_INC21']
    
    y_dem = gdf['PCT_DEM']
    popt_dem, _ = curve_fit(polynomial, x_data, y_dem)
    
    y_rep = gdf['PCT_REP']
    popt_rep, _ = curve_fit(polynomial, x_data, y_rep)

    x_fit = np.linspace(min(x_data), max(x_data), 500)
    
    y_fit_dem = polynomial(x_fit, *popt_dem)
    y_fit_rep = polynomial(x_fit, *popt_rep)

    plt.figure(figsize=(10, 6))
    plt.scatter(x_data, y_dem, color='blue', label='Democratic Votes', alpha=0.05)
    plt.scatter(x_data, y_rep, color='red', label='Republican Votes', alpha=0.05)
    plt.plot(x_fit, y_fit_dem, color='blue', label='Democratic Fit', linewidth=2)
    plt.plot(x_fit, y_fit_rep, color='red', label='Republican Fit', linewidth=2)
    plt.title(f'Vote Percentage for Democrat vs Republican by Average Income in Precincts')
    plt.xlabel(f'Average Income')
    plt.ylabel('Vote Percentage (%)')
    plt.grid(color='gray', linestyle='--', linewidth=0.5)
    plt.legend()
    plt.show
    
gingles_test_income(working_directory, "california_precinct_merged.geojson")
gingles_test_income(working_directory, "california_precinct_merged.geojson", 'Rural')
gingles_test_income(working_directory, "california_precinct_merged.geojson", 'Urban')
gingles_test_income(working_directory, "california_precinct_merged.geojson", 'Suburban')

In [None]:
# 2/3 gingles test for income race
def gingles_test_race_income(working_directory, geojson, race_column):
    gdf = gpd.read_file(working_directory + geojson)
    gdf["PCT_RACE"] = gdf[race_column] / gdf["TOT_POP"] * 100

    # Fliter out precincts where pct race is less than 15%
    gdf = gdf[gdf["PCT_RACE"] >= 15]
    
    gdf = gdf.dropna(subset=['PCT_RACE', 'PCT_DEM', 'PCT_REP'])

    scaler = StandardScaler()
    gdf['income_scaled'] = scaler.fit_transform(gdf[['MEDN_INC21']])
    gdf['race_scaled'] = scaler.fit_transform(gdf[['PCT_RACE']])
    gdf['combined_metric'] = (gdf['income_scaled'] + gdf['race_scaled']) / 2
    
    x_data = gdf['combined_metric']
    
    y_dem = gdf['PCT_DEM']
    popt_dem, _ = curve_fit(polynomial, x_data, y_dem)
    
    y_rep = gdf['PCT_REP']
    popt_rep, _ = curve_fit(polynomial, x_data, y_rep)
    
    x_fit = np.linspace(min(x_data), max(x_data), 500)
    
    y_fit_dem = polynomial(x_fit, *popt_dem)
    y_fit_rep = polynomial(x_fit, *popt_rep)

    
    plt.figure(figsize=(10, 6))
    plt.scatter(x_data, y_dem, color='blue', label='Democratic Votes', alpha=0.05)
    plt.scatter(x_data, y_rep, color='red', label='Republican Votes', alpha=0.05)
    plt.plot(x_fit, y_fit_dem, color='blue', label='Democratic Fit', linewidth=2)
    plt.plot(x_fit, y_fit_rep, color='red', label='Republican Fit', linewidth=2)
    plt.title(f'Vote Percentage for Democrat vs Republican by Percentage of {race_column} Voters and Average Income in Precincts')
    plt.xlabel(f'Standarized Metric for {race_column} Voters and Average Income')
    plt.ylabel('Vote Percentage (%)')
    plt.grid(color='gray', linestyle='--', linewidth=0.5)
    plt.legend()
    plt.show

race_columns = ["POP_WHT", "POP_ASN", "POP_HISLAT", "POP_BLK", "POP_AINDALK", "POP_HIPI", "POP_OTH", "POP_TWOMOR"]

for race_col in race_columns:
    precinct_data = gingles_test_race_income(working_directory, "california_precinct_merged.geojson", race_col)