# Distance between school and nearest grocery

We'll do a brute force execution of code to compare each school's location to the 
grocery stores in the same city. For Austin, that works out to 39 grocery stores x 62 schools, or 2,418 comparisons of latitude and longitude. Not efficient by a long shot, but the dataset is small enough that it runs in less than a second.

In [1]:
# This library is needed to get distance between two sets of coordinates

# !pip install geopy


In [16]:
import pandas as pd
import numpy as np
import geopy.distance as gp

In [17]:
# Getting the coordinates for Austin schools from earlier data gathered from the U.S.
# Department of Education combined with a google API pull

infile = pd.read_csv("Austin_Coords.csv")
school_df = pd.DataFrame(infile)

In [18]:
# Getting the coordinates for Austin grocery stores from earlier google API pull

infile2 = pd.read_csv("Austin_groceries.csv")
grocery_df = pd.DataFrame(infile2)

In [19]:
school_df.head(2)

Unnamed: 0,School Name,Location,Percent in Poverty,Lat,Long,Google Place ID
0,Allison Elementary,Austin,92.0,30.168207,-97.81776,ChIJ04RBdWa3RIYRgJ7aGK5dgXM
1,Andrews Elementary School,Austin,91.02,30.317554,-97.679663,ChIJ6YaMBenJRIYRhrPJHKXhGBU


In [20]:
grocery_df.head(2)

Unnamed: 0,Store Name,Location,Lat,Long,Vicinity
0,H-E-B,Austin,30.216284,-97.830988,"6900 Brodie Ln, Austin"
1,H-E-B,Austin,30.197923,-97.786481,"600 W William Cannon Dr, Austin"


In [7]:
# Making arrays for latitude and longitude for the schools

school_lat = np.array(school_df["Lat"])
school_long = np.array(school_df["Long"])

In [8]:
# Making arrays for latitude and longitude for the stores

grocery_lat = np.array(grocery_df["Lat"])
grocery_long = np.array(grocery_df["Long"])

In [9]:
# Making tuples to hold latitude and longitude so they can be put into the function coming up

school_coords = [(x,y) for x,y in zip(school_lat, school_long)]
store_coords = [(x,y) for x,y in zip(grocery_lat, grocery_long)]

In [10]:
# Making tuples for the school name and coordinate so as to be able to double-check later that they match

school_name = school_df["School Name"]
schools = [(x,y) for x,y in zip(school_name, school_coords)]

In [11]:
# Making tuples for store name, address, and coordinates so that they can be referenced by index

store_address = grocery_df["Vicinity"]
store_name = grocery_df["Store Name"]
stores = [(x,y,z) for x,y,z in zip(store_name, store_address, store_coords)]

In [12]:
# Testing the geopy.distance.distance function using first school and first store in our arrays

print(gp.distance(school_coords[0], store_coords[0]).miles)

3.4049041064242647


In [13]:
# To find the closest store to each school, some setup is required
# First I'll make some empty lists to hold the store name, the index for that store 
# so that I can look it up in the tuple later, the school index (school = 0 to start)
# so that I can make sure it's matching, and then school_index to hold that list
# of indices. The last two are a bit of overkill, but I'm making sure everything
# matches.

closest_store = []
closest_store_index = []
school = 0
school_index = []

# i is the index as we go through school by school; j does the same for stores 
# index (the variable) means the index that matches the store that is closest to
# the school being referenced during that portion of the loop.
# I'll start with i and j at 0 so that we get the first school and the first store
# when we start the loop. Closest represents the radius in miles that I searched. 
# It will be reset inside the loop to equal the store closest to the school 
# we're testing.

i = 0
j = 0
index = 0
closest = 10

# We start with an outside loop to go through every school. We have to subtract
# one from the length of the array so that the indexing matches up since the index
# begins with zero

while i <= (len(schools) - 1):
    
    # The inside loop will take one school and go through every store
    
    while j <= (len(stores) - 1): 
        
        # This handy function from geopy gives the distance in miles
        
        distance = gp.distance(school_coords[i], store_coords[j]).miles
        
        # The distance will initially be less than 10 miles, so the variable
        # 'closest' will change to equal that distance. Afterward, each iteration
        # of the loop will check to see if there's a closer store, and each time
        # it finds one, it will give both the distance and the index j that tells
        # which store is connected to that distance.
        
        if distance < closest:
            closest = distance
            index = j
        
        # Here j is telling the loop to move to the next store.
            
        j += 1
        
    # Back in the outside loop, we'll append the distance and index of the closest
    # store as well as the index for the school name to make it easy to match them up

    closest_store.append(closest)
    closest_store_index.append(index)
    school_index.append(school)
    
    # Before restarting the inner loop, we'll need to reset the variables
    
    closest = 10
    index = 0
    j = 0
    
    # We'll increment so that the school index matches what will become the dataframe
    # index and i will increment to tell us to take the next school and check the 
    # grocery stores against it.
    
    school += 1
    i += 1
    
    
# print(closest_store,closest_store_index,school_index)    

In [14]:
# This loop will create arrays to loop through so that they are sorted by what will
# become the dataframe index. I didn't use the earlier arrays because they contained
# tuples, and I can't drop those into a dataframe. I probably could have done this
# an easier way, but this made sense when I wrote it.

# Setting up a new counter, i, and new arrays

i = 0
close_school = []
close_store = []
store_address = []
distance_to_store = []

# Using the closest_store_index from the earlier loop, I'm able to pull out the 
# information from various tuples that matches that index

for store in closest_store_index:
#     print(f"{schools[i][0]}, {stores[store][0]}, {stores[store][1]}, {closest_store[i]}")

    # This uses i instead of 'store' in the loop because I want the schools to be incremented
    # in order. 'Store' is the closest store index. Schools is the tuple I'm pulling from.
    # In this case, I'm taking the ith tuple and the first element of it, which is the
    # school name.

    close_school.append(schools[i][0])
    
    # From the store index pulled by the loop, I'm getting the name of the closest
    # store, which is the first element in the stores tuple.
    
    close_store.append(stores[store][0])
    
    # The same tuple's second element gives the store address.
    store_address.append(stores[store][1])
    
    # The distance that we pulled in the earlier loop is now referenced and rounded.

    distance_to_store.append(round(closest_store[i],2))
    
    # i is incremented so that the next school name will be called
    
    i += 1

In [15]:
df = pd.DataFrame()

In [16]:
# The new dataframe contains the schools in order, along with the closest store,
# its address, and its distance as the crow flies. It would be helpful to get the
# driving distance but may be hard to obtain that information programatically. 
# I know that, for example, Allison Elementary, the first school on our list, is actually
# about a mile's drive away from the school. So distance is deceiving. It tends to
# be closer than the real driving distance.

df["School"] = close_school
df["Closest Store"] = close_store
df["Store Address"] = store_address
df["Distance in Miles"] = distance_to_store

In [17]:
df.head()

Unnamed: 0,School,Closest Store,Store Address,Distance in Miles
0,Allison Elementary,H-E-B,"2110 W Slaughter Ln, Austin",0.66
1,Andrews Elementary School,H-E-B,"7112 Ed Bluestein Blvd #125, Austin",0.97
2,Becker Elementary School,H-E-B,"2400 S Congress Ave, Austin",0.84
3,Blackshear Elementary School,H-E-B,"2701 E 7th St, Austin",0.83
4,Blanton Elementary,H-E-B,"1801 E 51st St, Austin",0.63


In [18]:
# The next step will be to add the poverty rate to the dataframe. I'll look at 
# the school_df that we made earlier.

school_df.head(3)

Unnamed: 0,School Name,Location,Percent in Poverty,Lat,Long,Google Place ID
0,Allison Elementary,Austin,92.0,30.168207,-97.81776,ChIJ04RBdWa3RIYRgJ7aGK5dgXM
1,Andrews Elementary School,Austin,91.02,30.317554,-97.679663,ChIJ6YaMBenJRIYRhrPJHKXhGBU
2,Becker Elementary School,Austin,64.52,30.250336,-97.759642,ChIJbSvrueG0RIYRxmI7kMMuRAY


In [19]:
# I'll output poverty percentage to its own array

poverty = school_df["Percent in Poverty"]

In [20]:
# Now I'll add it to our dataframe

df["Percent in Poverty"] = poverty

In [21]:
df.head(3)

Unnamed: 0,School,Closest Store,Store Address,Distance in Miles,Percent in Poverty
0,Allison Elementary,H-E-B,"2110 W Slaughter Ln, Austin",0.66,92.0
1,Andrews Elementary School,H-E-B,"7112 Ed Bluestein Blvd #125, Austin",0.97,91.02
2,Becker Elementary School,H-E-B,"2400 S Congress Ave, Austin",0.84,64.52


In [22]:
# Time to output this dataframe so it can be analyzed.

df.to_csv("Austin_distance_to_stores.csv", index = False)

# Conclusion for now: 
it would be helpful to have driving distance to get a true picture of how far away a grocery store is.

In [None]:
# Google distance matrix API

# https://maps.googleapis.com/maps/api/distancematrix/json?parameters


# use pipe | to separate coordinates

In [23]:
import gmaps
from config import gkey2
import requests
import json

In [24]:
infile = pd.read_csv("Austin_distance_to_stores.csv")
driving_df = pd.DataFrame(infile)
driving_df.head(2)

Unnamed: 0,School,Closest Store,Store Address,Distance in Miles,Percent in Poverty
0,Allison Elementary,H-E-B,"2110 W Slaughter Ln, Austin",0.66,92.0
1,Andrews Elementary School,H-E-B,"7112 Ed Bluestein Blvd #125, Austin",0.97,91.02


In [25]:
# Trying sample first to see if the API works as expected

# Allison Elementary

lat = 30.168207
long = -97.817760

# HEB nearby

lat2 = 30.175379
long2 = -97.825078

In [26]:
params = {
    "origins": f"{lat},{long}", 
    "key": gkey2, 
    "destinations": f"{lat2},{long2}", 
    "units": "imperial"
}

# optional to use "mode" = "transit", then "transit_mode" = "bus"



In [27]:
base_url = "https://maps.googleapis.com/maps/api/distancematrix/json?"

In [31]:
data = requests.get(base_url, params = params).json()


In [36]:
# print(json.dumps(data, indent=4, sort_keys=True))

In [35]:
# Distance between Allison and its nearest grocery store when driving is

distance1 = data["rows"][0]["elements"][0]["distance"]["text"]
print(distance1)

1.0 mi


In [37]:
# If grabbing distance by bus

params = {
    "origins": f"{lat},{long}", 
    "key": gkey2, 
    "destinations": f"{lat2},{long2}", 
    "units": "imperial",
    "mode": "transit",
    "transit_mode": "bus"
}

In [40]:
data2 = requests.get(base_url, params = params).json()

In [41]:
bus_distance = []
bus_fare = []

In [42]:
bus_distance.append(data2["rows"][0]["elements"][0]["distance"]["text"])
bus_fare.append(data2["rows"][0]["elements"][0]["fare"]["text"])

In [43]:
print(bus_distance, bus_fare)

['1.3 mi'] ['$1.25']


In [None]:
# So Allison Elementary is 0.66 miles from the nearest store as the crow flies,
# 1.0 miles away by driving, and 1.3 miles away if riding the bus.