In [71]:
!pip install deap



In [72]:
#Import necessary libraries
import pandas as pd
import geopandas as gpd
import numpy as np
from geopy.distance import great_circle
from deap import base, creator, tools, algorithms
import random

In [74]:
#Step 1: Load Population Data
pop_data = pd.read_csv("Mo_pop_Sim.csv")

#Randomly sample a small subset (0.01%) of the population data for efficiency
small_data = pop_data.sample(frac=SAMPLE_FRACTION, random_state=42)

#Extract population coordinates
pop_coords = small_data[['long', 'lat']].values

In [75]:
#Step 2: Load FQHC Data
fqhc_data = gpd.read_file("MO_2018_Federally_Qualified_Health_Center_Locations.shp")

#Extract FQHC coordinates
fqhc_coords = fqhc_data[['Longitude', 'Latitude']].values

In [76]:
#APPROACH 1: AVERAGE DISTANCE TO POPULATION CENTERS
def calc_avg_distance(fqhc_idx):
    """Calculates the average distance from an FQHC to all population locations."""
    #Extract population coordinates
    pop_coords = small_data[['lat', 'long']].values

    #Extract FQHC coordinates
    fqhc_coords = fqhc_data[['Latitude', 'Longitude']].values

    #fqhc_idx is now correctly passed as an argument to the function
    fqhc_coord = fqhc_coords[fqhc_idx]
    distances = [great_circle(fqhc_coord, pop_coord).meters for pop_coord in pop_coords]
    return np.mean(distances) / MILE_TO_METERS  # Convert to miles

#Compute the average distance for each FQHC
#The function is called with the index 'i' for each FQHC in the loop
fqhc_data["avg_distance"] = [calc_avg_distance(i) for i in range(len(fqhc_coords))]

In [78]:
#STEP 3: SELECT TOP FQHCs BASED ON DIFFERENT METRICS
#TOP 8 FQHCs BASED ON AVERAGE DISTANCE (CLOSER IS BETTER)
top_by_distance = fqhc_data.nsmallest(8, "avg_distance")[["OBJECTID", "Facility", "City", "avg_distance"]]
print("Top 8 FQHCs by Closeness to Residents:\n", top_by_distance)

#COMBINED SCORE: RANKING BOTH CRITERIA (ONLY DISTANCE INCLUDED)
fqhc_data["dist_rank"] = fqhc_data["avg_distance"].rank()
fqhc_data["combined_score"] = fqhc_data["dist_rank"] / len(fqhc_data)  # Adjusted for only one ranking criterion

#TOP 8 FQHCs BASED ON COMBINED SCORE
top_combined = fqhc_data.nsmallest(8, "combined_score")[["OBJECTID", "Facility", "City", "avg_distance"]]
print("Top 8 FQHCs by Combined Score:\n", top_combined)



Top 8 FQHCs by Closeness to Residents:
      OBJECTID                                           Facility  \
44         45  Community Health Center of Central Missouri - ...   
43         44  Community Health Center of Central Missouri - ...   
45         46  Community Health Center of Central Missouri (A...   
196       197  Community Health Center of Central Missouri St...   
11         12  Community Health Center of Central Missouri - ...   
42         43  Community Health Center of Central Missouri - ...   
47         48  Compass Health/Crider Health Center (Behaviora...   
50         51  Compass Health/Crider Health Center (Dental Se...   

               City  avg_distance  
44             Linn    106.018243  
43   Jefferson City    106.963568  
45   Jefferson City    106.963568  
196  Jefferson City    107.042886  
11           Fulton    108.943858  
42       California    110.033516  
47            Union    110.299238  
50            Union    110.444956  
Top 8 FQHCs by Combined

In [79]:
#STEP 4: OPTIMIZATION USING GENETIC ALGORITHM (GA)
#Define fitness function (minimize average distance)
def fitness_function(individual):
    """Computes the average distance for a given set of selected FQHCs."""
    total_distance = sum(calc_avg_distance(idx) for idx in individual)
    avg_distance = total_distance / len(individual)  # Compute average distance
    return avg_distance,

#Setup GA using DEAP
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("indices", random.sample, range(len(fqhc_coords)), N_FQHC_SELECT)  # Random selection
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.indices)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

#Register Genetic Algorithm functions
toolbox.register("evaluate", fitness_function)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutShuffleIndexes, indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)

#Run the Genetic Algorithm
population = toolbox.population(n=50)  # Create population of 50
result = algorithms.eaSimple(population, toolbox, cxpb=0.7, mutpb=0.2, ngen=10, verbose=False)

#Extract best solution
best_individual = tools.selBest(population, k=1)[0]
best_fqhc = fqhc_data.iloc[best_individual][["OBJECTID", "Facility", "City", "avg_distance"]]
print("Optimal 8 FQHCs for Mental Health Services (Minimizing Distance):\n", best_fqhc)




Optimal 8 FQHCs for Mental Health Services (Minimizing Distance):
      OBJECTID                                           Facility  \
188       189  Your Community Health Center (Administration O...   
11         12  Community Health Center of Central Missouri - ...   
187       188                     Arthur Center Community Health   
186       187       Family Health Center East (Medical & Dental)   
43         44  Community Health Center of Central Missouri - ...   
42         43  Community Health Center of Central Missouri - ...   
42         43  Community Health Center of Central Missouri - ...   
84         85  Katy Trail Community Health - Prairie Hills Cl...   

               City  avg_distance  
188           Rolla    110.993816  
11           Fulton    108.943858  
187          Mexico    115.627008  
186        Columbia    111.730096  
43   Jefferson City    106.963568  
42       California    110.033516  
42       California    110.033516  
84       Versailles    112.41229

Rationale for Choosing Approach 1 (Average Closeness Score) & Assumptions

I chose Approach 1 (Average Closeness Score) because it prioritizes accessibility by ensuring that the selected Federally Qualified Health Centers (FQHCs) are as close as possible to the population. This method reduces the average distance people must travel, making it easier for them to access mental health services.

Assumptions in Computing Fitness:

1. Equal Demand for Mental Health Services: Every individual in the dataset has an equal likelihood of needing mental health services.
2. Distance is the Primary Accessibility Factor: Shorter distances directly translate to better access, ignoring other factors such as availability of transportation, socioeconomic barriers, or facility capacity.
3. No Variations in Facility Capabilities: All FQHCs have equal capacity to handle mental health patients, meaning they can all offer the same level of service.
4. Population Sampling is Representative: Considering a 0.01% sample of the population data for efficiency, the small sample accurately represents the entire population’s distribution.

By focusing on minimizing average distance, this method ensures that more people can access mental health services with the least amount of travel time.
Fitness Function - Metric & Optimization Objective Metric Computed The fitness function measures the average Haversine (great-circle) distance from each FQHC to all sampled residential locations.

Optimization Objective: To minimize travel distance, the goal is to select FQHCs with the lowest average distance to residences. The lower the fitness score, the better the FQHC site for mental health service placement. By minimizing the average distance, this approach ensures that the selected FQHCs are located as close as possible to the majority of the population, increasing accessibility and utilization of mental health services.