# Minimizing the Travel Distance of Pharmacy Students to Hospitals


## Introduction
   ### Each semester [TSU College of Pharmacy](http://www.tsu.edu/academics/colleges-and-schools/college-of-pharmacy-and-health-sciences/) sends graduate pharmacy students to Houston hospitals as a part of their internship program. The pharmacy program on average assigns more than three hundred students to different available locations across Houston. Although the program administration spends a long time to find the right location for each student, sometimes many students are asisgned to the locations far from their home.
 ### The ojective of this notebook is to offer a solution for minimization of the total distance that students travel to reach to the hospital while avoiding long commutes. To solve the problem, deap, a python library that include a genetic algorithm code, is used.
 ### The notebook workflow is as follows:
 * Import required libraries
 * Read csv files
 * Data wrangling
 * Create deap model
 * Create an objective function
 * Assign the objective function to the model
 * Run the model
 
### The full procedure could be found below.

In [None]:
### Uncomment any missing library

#! pip install --user numpy
#! pip install --user pandas
#! pip install --user deap

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from math import radians, cos, sin, asin, sqrt
import random
from deap import algorithms, base, creator, tools

In [None]:
# Read a csv file containing lat and lon of city of Houston zip codes 
df_ZipCodeCoord = pd.read_csv('City_of_Houston_Zip_Codes.csv',index_col=0)
df_ZipCodeCoord.head

In [None]:
# Change the index to zip code
df_ZipCodeCoord = df_ZipCodeCoord.set_index('Zip_Code')
df_ZipCodeCoord

In [None]:
# Create a dictionary where Dict['Latitude'][Zip_code] returns the Lat of zip code
df_ZipCodeCoord_dict = df_ZipCodeCoord.to_dict()
df_ZipCodeCoord_dict

In [None]:
# Read csv file containing the zip code of the students
df_students = pd.read_csv('Students_Zip_Code.csv',index_col=0)
df_students.head

In [None]:
# Read csv file containing the zip code of the hospitals and their capacity
df_Hospitals = pd.read_csv('Hospitals_Zip_Code.csv',index_col=0)
df_Hospitals.head

In [None]:
# Create a dataframe that contains the zip code of every position
list_zip_hosp = []
for index, row in df_Hospitals.iterrows():
    for cap in range(row['Capacity']):
        list_zip_hosp.append(row['Hospital_Zip_Code'])

df_Hospitals_extended = pd.DataFrame(list_zip_hosp, columns =['Hosp_Zip_codes'])

In [None]:
# Creates a new class named "FitnessMin" inheriting from "base.Fitness" with attrebute "weights=(-1.0,)"
# The fitness is a measure of quality of a solution.
creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) # -1 -> minimum problem
creator.create("Individual", list, fitness=creator.FitnessMin)

In [None]:
# Create a toolbox
toolbox = base.Toolbox()

In [None]:
# Attribute generator 
toolbox.register("index", np.random.choice, len(df_students), len(df_students), replace=False) # choose all spots

In [None]:
# Structure initializers
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.index)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [None]:
# Sample individual 
ind = toolbox.individual()
ind

In [None]:
## Function for get a total distance of student travel
def total_distance(ind):
    
    df_students_temp = df_students.iloc[ind]
    
    dist_sum = sum(distance(df_students_temp.iloc[i]['Students_Zip_Code'], df_Hospitals_extended.iloc[i]['Hosp_Zip_codes']) for i in range(df_students_temp.shape[0]))
    
    return dist_sum

In [None]:
def distance(zip1, zip2):
    # import lat and lon of zip codes, both students and hospitals
    lng1, lat1, lng2, lat2 = radians(df_ZipCodeCoord_dict['Longitude'][zip1]), radians(df_ZipCodeCoord_dict['Latitude'][zip1]), radians(df_ZipCodeCoord_dict['Longitude'][zip2]), radians(df_ZipCodeCoord_dict['Latitude'][zip2])
    
    # FAA approved globe radius in km (radius of the earth)
    RADIUS = 6371 
    
    dlng = lng2-lng1
    dlat = lat2-lat1
    
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlng/2)**2
    c = 2 * asin(sqrt(a)) 
    
    dist = RADIUS * c
    
    # Multiply by distance to penalize long commutes
    return dist * dist

In [None]:
def eval_func(individual):
    
    # 1 total distance -> minimun
    t_dist = total_distance(individual)
    
    # 2 penalty
    penalty = len(individual) - len(set(individual))
    t_dist += penalty*1000000
    
    return t_dist,

In [None]:
# Test eval function
eval_func(ind)

In [None]:
# Add eval funcion to toolbox
toolbox.register("evaluate", eval_func)

In [None]:
# Create the optimization requiremenst
toolbox.register("select", tools.selNSGA2)
toolbox.register("mate", tools.cxTwoPoint)
# tools.mutShuffleIndexes : Shuffle the attributes of the input individual and return the mutant.
toolbox.register("mutate", tools.mutShuffleIndexes, indpb=0.8)
hof = tools.HallOfFame(1)

In [None]:
# Optimization parameters
POP_SIZE = 200
MAX_GEN = 100
MUT_PROB = 0.2
CX_PROB = 0.8

In [None]:
# Example of generated population
pop = toolbox.population(n=POP_SIZE)
pop

In [None]:
# Add statistical information to the output
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean, axis=0) 
stats.register("min", np.min, axis=0)
stats.register("max", np.max, axis=0)

In [None]:
%%time 
# Run the case
result, log = algorithms.eaMuPlusLambda(pop, 
                                     toolbox, 
                                     mu=POP_SIZE, # The number of individuals to select for the next generation.
                                     lambda_= POP_SIZE, # The number of children to produce at each generation.
                                     cxpb= CX_PROB,
                                     mutpb= MUT_PROB, 
                                     halloffame=hof,
                                     stats= stats, 
                                     ngen= MAX_GEN,
                                     verbose= True)

In [None]:
result

In [None]:
hof