In [1]:
# Run this to install necessary packages and restart the kernel for osmnx import error to be resolved

# !pip install geopandas
# !pip install folium
# !pip install geopy
# !pip install taxicab
# !pip install osmnx==1.9.1
# !pip install --upgrade osmnx matplotlib numpy

Import all libraries

In [2]:
import warnings

warnings.simplefilter("ignore", category=FutureWarning)

import pandas as pd
import geopandas as gpd
import math
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster
import requests 
import numpy as np
import random
from geopy.geocoders import Nominatim
from IPython.display import Image, IFrame 
from IPython.core.display import HTML 
from pandas import json_normalize
%matplotlib inline
import osmnx as ox
import networkx as nx
import matplotlib
import matplotlib as plt
import taxicab as tc
from IPython.display import clear_output
import csv

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


The objective of this script is to generate the data set we will use for the remainder of this project. 

This script will focus more on the implementation of the various libraries explored in the "Book 0 - Interactive Map", so please refer to that if you need more background info.

So what do we actually want our data set to have?

Well, we want a set of data points which map to various high-traffic locations for AGM to consider when thinking about where to open brick-and-motar locations and how to determine the routes autonomous trucks should be planned around. 

So we will want the following information:

    Location Name
    Location Longitude
    Location Latitude
    Location County
    Location Estimated Population
    Shortest path distance between each other location in the same county
    
Note: We are not using any import to get the exact number of people at each location and so we will be providing random numbers for this. 

First lets define the functions we will call to get some of our key features

In [4]:
# List of queries we want to consider when running get_locations()

company_names = [
    'Lawrence Livermore National Laboratory','Gap Inc.', 'Levi Strauss & Co.', 'Lucid Motors',
    'Rivian', 'Uber', 'Waymo', 'Zoox',
    '23andMe',
    'Chiron',   
    'Clorox', 'GoPro', 'Sephora', 
    'Chegg',    
    'Course Hero', 'Coursera', 'Khan Academy', 'Magoosh', 'Quizlet',
    'Udacity',
    'Apple Inc.', 'Applied Materials', 'Asus',
    'Fitbit', 'Fujitsu Computer Products of America',
    'Hitachi Data Systems', 'Integrated Device Technology', 'Intel',
    'Juniper Networks','Logitech',
    'Western Digital', 
    'Bloom Energy', 'Chevron',
    'SolarCity', 
    'Capcom U.S.A.', 'Crunchyroll', 'Electronic Arts',
    'Lucasfilm Animation', 'Netflix',
    'Pixar', 'Roblox Corporation', 'Sega of America', 'Skywalker Sound', 'Sony Interactive Entertainment', 'PlayStation',
    'Ubisoft', 'Zynga'
]


queries = ['College', 'Parks', 'University', 'BART Station']
queries.extend(company_names)

In [8]:
# Define specific centeral coordinates and radi for each of the 9 counties in the bay area

# counties = ['San Francisco', 'Solano', 'Santa Clara', 'Contra Costa', 'Alameda', 'Sonoma', 'Marin', 'Napa', 'San Mateo']
# coordinates = [[37.759344, -122.446452], [38.295717, -121.934735], [37.258336, -121.734320],[37.955321, -121.879418], 
#                [37.674112, -121.971002], [38.484955, -122.827157], [38.063311, -122.732092],[38.450523, -122.329816],
#               [37.466796, -122.391435]]
# radi = [8000, 18000, 38000, 28000, 40000, 30000, 26000, 28000, 28000]

# While we have information on all 9 Bay Area counties, BART operates in San Francisco, San Mateo, Alameda, Contra Costa, and Santa Clara
# Therefore, we are opting to only include them for this Proof of Concept

counties = ['San Francisco', 'Contra Costa', 'San Mateo', 'Alameda', 'Santa Clara']
coordinates = [[37.759344, -122.446452], [37.955321, -121.879418], 
                [37.466796, -122.391435], [37.674112, -121.971002], [37.258336, -121.734320]]
radi = [10000, 30000, 30000, 30000, 30000]


# Super useful resources:
# https://www.freemaptools.com/find-usa-cities-inside-radius.htm
# Google Maps

Verify the general areas map up

In [15]:
# Create a map

i = 4

print(counties[i])

m_1 = folium.Map(location=coordinates[i], tiles='openstreetmap', zoom_start=12)

# Display the map
m_1

Santa Clara


In [16]:
def get_locations(n):
    
    # This function return the top 'n' most populated areas for each county (matched with the county)
    
    # Lists to store the name, coordinates and county of each result
    
    r_names = []
    r_coordinates = []
    r_county = []
    r_formatted_address = []
    
    # Initialize URL for our request and provide your authorization key for foursquare
    url = "https://api.foursquare.com/v3/places/search"
    
    headers = {
        "Accept": "application/json",
        "Authorization": "fsq3NLgtd3t+WRQkrIsxCEF/vjPYC/RX8ytHHCzGVfTj7XU="
    }
    
    # For each county
    for i, county in enumerate(counties):
        # For each query
        for j, query in enumerate(queries):

            params = {
                "query": f"{query}",
                "ll": f"{coordinates[i][0]},{coordinates[i][1]}",
                "open_now": "false",
                "radius": f"{radi[i]}",
                "limit":50,
                "sort":"DISTANCE"
            }

            response = requests.request("GET", url, params=params, headers=headers)
            
            # For each result returned we specifically want the name and (long., Lat.) pair
            
            try:
                
                clear_output(wait=True)
                print(f"{i + 1} out of {len(counties)} counties | {j}/{len(queries)} of queries run")
                
#                 print(f"{coordinates[i][0]}, {coordinates[i][1]}")
#                 print(response.json())
#                 print(len(response.json()['results']))

                for item in response.json()['results']:

                    r_names.append(item['name'])
                    r_coordinates.append([item["geocodes"]['main']['latitude'], item["geocodes"]['main']['longitude']])
                    r_county.append(county)
                    r_formatted_address.append(item["location"]['formatted_address'])
                    
            except:
                
                continue
    
    print("\nDone!")
    
    return r_names, r_county, r_coordinates, r_formatted_address
                
            
r_names, r_county, r_coordinates, r_formatted_address = get_locations(50)

5 out of 5 counties | 50/51 of queries run

Done!


In [17]:
# Verify by hand that coordinates and addresses link to the corresponding name

for i, addr in enumerate(r_formatted_address):
    print(r_names[i], addr, r_coordinates[i])

Rooftop School - Upper Campus 500 Corbett Ave, San Francisco, CA 94114 [37.75751, -122.444699]
Rooftop Elementary School 443 Burnett Ave, San Francisco, CA 94131 [37.754811, -122.44325]
Psi Omega Housing tRust 101 Woodland Ave (Parnassus), San Francisco, CA 94117 [37.762367, -122.453371]
UCSF Blood Draw Lab at Parnassus 400 Parnassus Ave, San Francisco, CA 94143 [37.764114, -122.457416]
UCSF - School Of Nursing 2 Koret Way, San Francisco, CA 94143 [37.762525, -122.458969]
Millberry Fitness & Recreation Center 500 Parnassus Ave, San Francisco, CA 94143 [37.763804, -122.459332]
Millberry Union 500 Parnassus Ave, San Francisco, CA 94143 [37.76374, -122.458468]
Chinese Immersion School at De Avila 1250 Waller St, San Francisco, CA 94117 [37.769761, -122.444279]
Mission Dolores Gym 3371 16th St (Church), San Francisco, CA 94114 [37.764214, -122.4284]
Pacific Primary School 1500 Grove St (Baker), San Francisco, CA 94117 [37.775611, -122.441688]
USF - Kalmanovitz Hall 2130 Fulton St (Cole St)

TTR Data Recovery Services 1 Sansome St ( 1 Sansome St #3500), San Francisco, CA 94104 [37.790521, -122.400923]
Alpine Data Labs 1550 Bryant St, San Mateo, CA 94103 [37.767544, -122.410894]
Sterling Data Storage 601 Van Ness Ave, San Francisco, CA 94102 [37.781216, -122.421378]
Lazarus Data Recovery 379 Clementina St, San Francisco, CA 94103 [37.780926, -122.403369]
Citus Data 450 Townsend St, San Francisco, CA 94107 [37.774628, -122.398553]
Disk Doctors Hard Drive Data Recovery Services San Francisco 795 Folsom St, San Francisco, CA 94107 [37.782184, -122.400575]
Insight Health Data Science 2148 3rd St, San Francisco, CA 94107 [37.762502, -122.389166]
Periscope Data 1405 Minnesota St (Harrison St), San Francisco, CA 94107 [37.751908, -122.38926]
Insight Data Science 500 3rd St, San Francisco, CA 94107 [37.780787, -122.395761]
Good Data Corporation 989 Market St, San Francisco, CA 94103 [37.789674, -122.402594]
Secure Data Recovery Services 580 California St, San Francisco, CA 94104 [3

DVC Music Building 321 Golf Club Rd (Diablo Valley College), Pleasant Hill, CA 94523 [37.968065, -122.069798]
DVC Music Technology Center 321 Golf Club Rd (Diablo Valley College), Pleasant Hill, CA 94523 [37.968065, -122.069798]
DVC Basement Cafe 321 Golf Club Rd (Diablo Valley College), Pleasant Hill, CA 94523 [37.968955, -122.070136]
DVC Performing Arts Center 321 Golf Club Rd (Diablo Valley College), Pleasant Hill, CA 94523 [37.96942, -122.070018]
Dvc Cafeteria Pleasant Hill , 94523, United States (Diablo Valley College), Pleasant Hill, CA 94523 [37.968988, -122.070569]
DVC Math Building 321 Golf Club Rd (Diablo Valley College), Pleasant Hill, CA 94523 [37.968062, -122.071165]
DVC Art Building 321 Golf Club Rd (Diablo Valley College), Pleasant Hill, CA 94523 [37.969888, -122.070764]
DVC Parking Lot Pleasant Hill, CA 94523 [37.97037, -122.070863]
DVC Engineering & Technology Building 321 Golf Club Rd (Diablo Valley College), Pleasant Hill, CA 94523 [37.967152, -122.071114]
DVC Quad 3

Tat It Up Entertainment 5433 Clayton Rd, Clayton, CA 94517 [37.95194, -121.955971]
Suchita Interactive 2518 Taylor Way, Antioch, CA 94531 [37.956204, -121.780438]
Do It Real Big Entertainment Inc. 4522 Elkhorn Way, Antioch, CA 94531 [37.972515, -121.768402]
Jp Jazz Entertainment Group 4485 Snowberry Ct, Concord, CA 94521 [37.949564, -121.992626]
SGL Entertainment 3266 Tioga Rd, Concord, CA 94518 [37.962404, -122.026014]
Concord Entertainment Catering 1950 Waterworld Pkwy, Concord, CA 94520 [37.973751, -122.051117]
A.J. Entertainment 400 Del Antico Ave, Oakley, CA 94561 [37.995277, -121.707663]
EnVie Interactive 1350 Treat Blvd (oak st), Walnut Creek, CA 94597 [37.925934, -122.056148]
The Magic Entertainment of John Gardenier 592 Flowering Plum Pl, Brentwood, CA 94513 [37.918884, -121.701923]
Amazing Magic Entertainment 592 Flowering Plum Pl, Brentwood, CA 94513 [37.918884, -121.701923]
Moonstone Interactive 2010 Crow Canyon Pl, San Ramon, CA 94583 [37.77914, -121.974038]
Iron Horse Int

McKinley Institute of Technology 400 Duane St (at James Ave), Redwood City, CA 94062 [37.481922, -122.235988]
Arshon Technology 533 Airport Blvd, Burlingame, CA 94010 [37.589325, -122.349523]
Bayshore Technology Park Gym 1300 Island Dr (btwn Marine & Bridge Pkwy), Redwood City, CA 94065 [37.537204, -122.255824]
Integrated Access Security 2671 El Camino Real, Redwood City, CA 94061 [37.472446, -122.215773]
Revitalize Integrated Body Systems 1618 El Camino Real (at Stone Pine), Menlo Park, CA 94025 [37.458306, -122.191645]
Integrated Surface Tech 1455 Adams Dr, Menlo Park, CA 94025 [37.477877, -122.142579]
Integrated Trade Services 212 Shaw Rd, South San Francisco, CA 94080 [37.640612, -122.410935]
Integrated Healthcare - A Chiropractic Wellness Center 480 California Ave, Palo Alto, CA 94306 [37.425726, -122.145805]
Renewal Integrated Massage 1012 Metro Cir, Palo Alto, CA 94303 [37.440946, -122.120611]
Integrated Healing Arts 4161 El Camino Way, Palo Alto, CA 94306 [37.412456, -122.12424

HealthMarkets Insurance - Michael Strauss 696 San Ramon Valley Blvd, Danville, CA 94526 [37.812469, -121.99463]
David Levi Cabinets 8000 Phaeton Dr, Oakland, CA 94605 [37.77309, -122.138768]
Randall E. Strauss 1999 Harrison St, Oakland, CA 94612 [37.807695, -122.265135]
Telsa Motors 6701 Amador Plaza Rd, Dublin, CA 94568 [37.70276, -121.924961]
Luxury Motors of Bay Area 7059 Commerce Cir, Pleasanton, CA 94588 [37.69929, -121.915865]
Kassabian Motors 6080 Dublin Blvd, Dublin, CA 94568 [37.705429, -121.905134]
Elias Motors 28546 Mission Blvd, Hayward, CA 94544 [37.637559, -122.055042]
Mk Motors Corporation 27151 Mission Blvd, Hayward, CA 94544 [37.643599, -122.061977]
Formula 1 Motors 25697 Mission Blvd, Hayward, CA 94544 [37.653563, -122.069848]
California Express Motors 25375 Mission Blvd, Hayward, CA 94544 [37.656344, -122.071653]
Mangale Motors 24588 Mission Blvd, Hayward, CA 94544 [37.663188, -122.075621]
Iconic Motors 2 California Ave (Stanley), Pleasanton, CA 94566 [37.668729, -12

Play It Again Sports 5548 Springdale Ave, Pleasanton, CA 94588 [37.692382, -121.924541]
LPFD Station 2, Engine 92 6300 Stoneridge Mall Rd (Stoneridge Dr), Pleasanton, CA 94588 [37.692365, -121.92296]
Electric Charging Station 28761 Hayward Blvd, Hayward, CA 94542 [37.653612, -122.017545]
West Dublin/Pleasanton BART Station 6501 Golden Gate Dr (at Patrick Way), Dublin, CA 94568 [37.699709, -121.928243]
Connolly Station 7550 St Patrick Way, Dublin, CA 94568 [37.701916, -121.929871]
Luv 2 Play 7871 Amador Valley Blvd, Dublin, CA 94568 [37.707028, -121.935681]
Learn And Play Montessori School 11900 Silvergate Dr, Dublin, CA 94568 [37.709296, -121.93918]
Next Play Consulting 6150 Stoneridge Mall Rd, Pleasanton, CA 94588 [37.69776, -121.924402]
Enchanted Play Family Day Care 3763 Muirwood Dr, Pleasanton, CA 94588 [37.669807, -121.915272]
County of Alameda Fire Station 7494 Donohue Dr, Dublin, CA 94568 [37.708898, -121.93334]
Val Vista Water Play Area Johnson Dr (Stoneridge), Pleasanton, CA 9

Deep Technology 3266 Trabuco Ct, San Jose, CA 95135 [37.30696, -121.760161]
Focused Integrated Training 3300 Quimby Rd, San Jose, CA 95148 [37.324083, -121.778752]
Integrated Communication Systems 550 Parrott St (Senter Rd.), San Jose, CA 95112 [37.308235, -121.850096]
AT&T Device Support Center 700 S Winchester Blvd, San Jose, CA 95128 [37.314554, -121.949275]
Integrated Display Technology 1161 Ringwood Ct, San Jose, CA 95131 [37.392907, -121.893725]
AlcoAlert Ignition Interlock Device 521 Charcot Ave, San Jose, CA 95131 [37.383887, -121.91444]
IntelliVision 6203 San Ignacio Ave, San Jose, CA 95119 [37.236381, -121.777865]
Intellect Square 4197 Rocket Dr, San Jose, CA 95101 [37.264503, -121.813058]
Check Into Cash Almaden Expwy (at Branham Ln), San Jose, CA 95136 [37.264262, -121.848256]
Intelligence Security 1692 Tully Rd, San Jose, CA 95122 [37.320449, -121.82633]
Intelli 1980 Senter Rd, San Jose, CA 95112 [37.313765, -121.852681]
Check Into Cash 4750 Almaden Expy, San Jose, CA 9511

So now that we have run our function, we want to remove any duplicate results from our lists. One way to do this would be to use a hashtable which creates unique keys based on the name and coordinates of each location. If a location already exists matching an existing key, we simply skip to the next location.

Lets perform some basic validation of our data and then implement the aforementioned algorithm.

In [18]:
len(r_county) == len(r_names) == len(r_coordinates) == len(r_formatted_address)

True

In [19]:
for i, county in enumerate(set(r_county)):
    print(f"{i + 1} {county}") 

1 San Mateo
2 Contra Costa
3 Alameda
4 Santa Clara
5 San Francisco


In [20]:
# Declare HashTable
HT = {}

# Declare the filtered_lists

f_names = []
f_coordinates = []
f_county = []
f_formatted_address = []

for i in range(len(r_names)):
#     Denote the key as r_names + r_coodinates[0] + r_coordinates[1]
    key = r_names[i] + str(r_coordinates[i][0]) + str(r_coordinates[i][1])
    
    if key not in HT:
        HT[key] = i
        
        f_names.append(r_names[i])
        f_coordinates.append(r_coordinates[i])
        f_county.append(r_county[i])
        f_formatted_address.append(r_formatted_address[i])
        
    else:
        
        continue

print(f"Removed: {len(r_names) - len(f_names)} Duplicates")
print(f"Filtered lists lengths are {len(f_names)}")


Removed: 523 Duplicates
Filtered lists lengths are 3689


In [21]:
# Verify by hand that coordinates and addresses link to the corresponding name

for i, addr in enumerate(f_formatted_address):
    print(f_names[i], addr, f_coordinates[i])

Rooftop School - Upper Campus 500 Corbett Ave, San Francisco, CA 94114 [37.75751, -122.444699]
Rooftop Elementary School 443 Burnett Ave, San Francisco, CA 94131 [37.754811, -122.44325]
Psi Omega Housing tRust 101 Woodland Ave (Parnassus), San Francisco, CA 94117 [37.762367, -122.453371]
UCSF Blood Draw Lab at Parnassus 400 Parnassus Ave, San Francisco, CA 94143 [37.764114, -122.457416]
UCSF - School Of Nursing 2 Koret Way, San Francisco, CA 94143 [37.762525, -122.458969]
Millberry Fitness & Recreation Center 500 Parnassus Ave, San Francisco, CA 94143 [37.763804, -122.459332]
Millberry Union 500 Parnassus Ave, San Francisco, CA 94143 [37.76374, -122.458468]
Chinese Immersion School at De Avila 1250 Waller St, San Francisco, CA 94117 [37.769761, -122.444279]
Mission Dolores Gym 3371 16th St (Church), San Francisco, CA 94114 [37.764214, -122.4284]
Pacific Primary School 1500 Grove St (Baker), San Francisco, CA 94117 [37.775611, -122.441688]
USF - Kalmanovitz Hall 2130 Fulton St (Cole St)

UCSF - Technology Store 500 Parnassus Ave, San Francisco, CA 94143 [37.766662, -122.449327]
Recursive Technology 646 Ashbury St, San Francisco, CA 94117 [37.769418, -122.446624]
Rare Device 4071 24th St (Castro), San Francisco, CA 94114 [37.751195, -122.433291]
Plant House by Rare Device 505 Divisadero St (Fell & Divisadero), San Francisco, CA 94117 [37.774491, -122.437967]
Rare Device 600 Divisadero St (at Hayes St), San Francisco, CA 94117 [37.775175, -122.437577]
USF - Center for Instructional Technology 2350 Turk Blvd (Tamalpais), San Francisco, CA 94118 [37.77875, -122.448468]
Counsel - Media & Technology Law 767 Valencia St, San Francisco, CA 94110 [37.76065, -122.421166]
Integrated Mortgage Capital 1907 Market St, San Francisco, CA 94103 [37.770428, -122.424897]
Community Technology Network 390 Valencia St, San Francisco, CA 94103 [37.766699, -122.422384]
Howell Technology 910 Steiner St (Macallister), San Francisco, CA 94117 [37.777942, -122.433011]
Icomppower Technology 417 Ba

Gap IT 4450 Rosewood Dr, Pleasanton, CA 94588 [37.696847, -121.883764]
Thermal Press International, Inc. 4555 Las Positas Rd Ste A, Livermore, CA 94551 [37.695823, -121.798406]
Synchronet Marine Inc. 4473 Willow Rd, Pleasanton, CA 94588 [37.688051, -121.896436]
Carney and Company School Photography 3613 Countryside Way, Antioch, CA 94509 [37.984417, -121.857331]
Athens Painting & Commercial Coatings 4291 Suzanne Dr, Pittsburg, CA 94565 [37.992766, -121.879256]
Clayton Mind & Body Connections 6200 Center St, Clayton, CA 94517 [37.940242, -121.933083]
Levity Longevity Fitness Studio 1026 Oak St, Clayton, CA 94517 [37.940719, -121.936421]
East County Boys & Girls Club 1001 Stoneman Ave, Pittsburg, CA 94565 [38.00505, -121.876462]
Contra Costa Country Employment And Human Services 4545 Delta Fair Blvd, Antioch, CA 94509 [38.004879, -121.855264]
D & A Roofing Systems Co. 5542 Sepulveda Ct, Concord, CA 94521 [37.953994, -121.946772]
Lone Tree Golf Course & Event Center 4800 Golf Course Rd, A

Edgewood County Park 10 Old Stage Coach Rd (at Edgewood Rd), Redwood City, CA 94062 [37.473538, -122.278645]
Blue Oak Trail Redwood City, CA 94062 [37.475645, -122.283773]
Pulgas Ridge Open Space Preserve Edmonds Rd (at Crestview Dr.), Redwood City, CA 94062 [37.478609, -122.291883]
Crestview Park 998A Crestview Dr, San Carlos, CA 94070 [37.484457, -122.284637]
Mavericks Beach Moss Beach, CA 94019 [37.498087, -122.496258]
Pillar Point Access & Maverick's Trailhead West Point Ave, Half Moon Bay, CA 94019 [37.501622, -122.496682]
Vista Park 1030 Vista Rd, Hillsborough, CA 94010 [37.551979, -122.348709]
El Corte de Madera Open Space Preserve 15010 Skyline Blvd, Woodside, CA 94062 [37.411171, -122.306515]
Highlands Park 2600 Melendy Dr (at Aberdeen Dr), San Carlos, CA 94070 [37.493526, -122.273713]
Pillar Point Bluff trailhead & Jean Lauer Trail 840 Airport St, Moss Beach, CA 94038 [37.511492, -122.505416]
Bay Meadows Park 380 E 28th Ave (Delaware), San Mateo, CA 94403 [37.544639, -122.297

Applied Underwriters 950 Tower Ln, Foster City, CA 94404 [37.558261, -122.27636]
Peninsula Building Materials 109 Seaport Blvd, Redwood City, CA 94063 [37.493607, -122.214471]
Bay Materials 3700 Haven Ct, Menlo Park, CA 94025 [37.484626, -122.181755]
Classic Materials Tanforan Mall, San Bruno, CA 94066 [37.636683, -122.417862]
Spilker Engineering and Applied Sciences Building 348 Via Pueblo Mall, Stanford, CA 94305 [37.428794, -122.174094]
Materials Science & Engineering William F Durand Building (496 Lomita Mall), Stanford, CA 94305 [37.426888, -122.173271]
Gordon and Betty Moore Materials Research Building 466 Lomita Mall, Stanford, CA 94305 [37.42774, -122.172818]
Applied Research Works 1003 Elwell Ct, Palo Alto, CA 94303 [37.431955, -122.104148]
Peninsula Building Materials 2490 Charleston Rd, Mountain View, CA 94043 [37.422036, -122.097656]
Applied Physics Systems 1245 Space Park Way (Geneva), Sunnyvale, CA 94043 [37.416871, -122.074037]
Applied Silver 26254 Eden Landing Rd, Haywa

Men's Wearhouse 1388 Stoneridge Mall Rd, Pleasanton, CA 94588 [37.692118, -121.927759]
Macy's Men's, Children's & Home 1300 Stoneridge Mall Rd (in Stoneridge Mall), Pleasanton, CA 94588 [37.693494, -121.928193]
Men's Wearhouse 2013 Stoneridge Mall Rd (in Stoneridge Mall), Pleasanton, CA 94588 [37.695354, -121.928725]
Menke & Associates Balasubramaniam 5376 Brookside Ct, Pleasanton, CA 94588 [37.689956, -121.922326]
Tri Valley Acupuncture & Oriental Medicine Center 7990 Amador Valley Blvd, Dublin, CA 94568 [37.705209, -121.935941]
Radience Medical Aesthetics & Laser Center 7667 Amador Valley Blvd, Dublin, CA 94568 [37.707721, -121.933779]
Art of Living Center for Yoga and Meditation 6690 Amador Plaza Rd, Dublin, CA 94568 [37.702118, -121.926147]
Falafel Town House of Mediterranean 7410 Amador Valley Blvd (at Village Pkwy), Dublin, CA 94568 [37.709393, -121.927836]
Amalfi Mediterranean​n Fusion 7111 Village Pkwy, Dublin, CA 94568 [37.709331, -121.925965]
Neuropathy & Diabetic Pain Treatm

Nurse Education Workshops 1777 Hamilton Ave, San Jose, CA 95125 [37.29469, -121.917076]
Electric Charging Station 3095 Yerba Buena Rd, San Jose, CA 95135 [37.300076, -121.762808]
Blossom Hill Caltrain Station 5560 Monterey Hwy, San Jose, CA 95138 [37.253033, -121.797207]
Service Station Maintenance 295 Burning Tree Dr, San Jose, CA 95119 [37.223717, -121.781351]
Electric Charging Station 5560 Lexington Ave, San Jose, CA 95123 [37.252488, -121.799826]
Electric Charging Station San Jose 250 Hospital Pkwy, San Jose, CA 95119 [37.239484, -121.800214]
VTA: Cottle Station Cottle Rd (at California State Route 85), San Jose, CA 95123 [37.248739, -121.803636]
San Jose Fire Department Station #24 2525 Aborn Rd (btwn Pumpherston Wy. & Nieman Blvd.), San Jose, CA 95121 [37.313076, -121.799945]
VTA Snell Light Rail Station Snell Avenue (at Highway 85), San Jose, CA 95123 [37.247727, -121.830705]
China Station Restaurant 80 Senter Rd, San Jose, CA 95111 [37.27781, -121.833088]
Blossom Hill Light Rai

Toshiba Electronic Components 2590 Orchard Pkwy (Orchard), San Jose, CA 95131 [37.384147, -121.930567]
Multitest Electronic Systems 3021 Kenneth St, Santa Clara, CA 95054 [37.377044, -121.95603]
Montalvo Arts Center 15400 Montalvo Rd (Hwy 9 - Saratoga), Saratoga, CA 95070 [37.243722, -122.03101]
Lucasfilm Games 1741 Technology Dr, San Jose, CA 95110 [37.367757, -121.920229]
Watermark Medical Animation & Illustration 1210 E Arques Ave, Sunnyvale, CA 94085 [37.379838, -121.994338]
Pixiense | Animation and Vfx Studio 820 E El Camino Real, Sunnyvale, CA 94087 [37.35515, -122.017819]
Netflix 1732 N 1st St, San Jose, CA 95112 [37.368052, -121.91287]
Netflix Bld B 150 Winchester Cir, Los Gatos, CA 95032 [37.260351, -121.962144]
Netflix Bld C 170 Knowles Dr (at Winchester), Los Gatos, CA 95032 [37.261039, -121.961252]
Netflix-Roku Fountain On Winchester 150 Winchester Cir, Los Gatos, CA 95032 [37.260296, -121.961998]
Netflix Speakeasy 100 Winchester Cir, Los Gatos, CA 95032 [37.259629, -121.96

Next we want to group many results which may point to one campus together

In [22]:
# print("1 - 100\n")

company_locations = {}

for i, address in enumerate(f_formatted_address):
    company_locations[f_names[i]] = address
    

In [23]:
len(company_locations.keys())

3339

In [24]:
for i,key in enumerate(company_locations.keys()): 
    print(i, f_names[i], company_locations[key])


0 Rooftop School - Upper Campus 500 Corbett Ave, San Francisco, CA 94114
1 Rooftop Elementary School 443 Burnett Ave, San Francisco, CA 94131
2 Psi Omega Housing tRust 101 Woodland Ave (Parnassus), San Francisco, CA 94117
3 UCSF Blood Draw Lab at Parnassus 400 Parnassus Ave, San Francisco, CA 94143
4 UCSF - School Of Nursing 2 Koret Way, San Francisco, CA 94143
5 Millberry Fitness & Recreation Center 500 Parnassus Ave, San Francisco, CA 94143
6 Millberry Union 500 Parnassus Ave, San Francisco, CA 94143
7 Chinese Immersion School at De Avila 1250 Waller St, San Francisco, CA 94117
8 Mission Dolores Gym 3371 16th St (Church), San Francisco, CA 94114
9 Pacific Primary School 1500 Grove St (Baker), San Francisco, CA 94117
10 USF - Kalmanovitz Hall 2130 Fulton St (Cole St), San Francisco, CA 94117
11 St Ignatius Church 650 Parker Ave (at Fulton St), San Francisco, CA 94118
12 Holy Family Day Home 299 Dolores St (16th), San Francisco, CA 94103
13 USF - The John Lo Schiavo, S.J. Center for Sc

561 DPW Information Technology 541 Jefferson Ave, Redwood City, CA 94063
562 Integrated Controls 160 Pine St, San Francisco, CA 94111
563 Echo Technology Solutions 150 Chestnut St, San Francisco, CA 94111
564 Integrated Plasmonics 250 Executive Park Blvd, San Francisco, CA 94134
565 Community Technology Network 100 Pine St, San Diego, CA 94111
566 San Francisco Nonprofit Technology Center 3565 17th St, San Francisco, CA 94110
567 Integrated Archive Systems 904 22nd St (btwn Minnesota &Indiana), San Francisco, CA 94107
568 California Technology Agency 1663 Mission St, San Francisco, CA 94103
569 Horizons School of Technology 2145 19th Ave, San Francisco, CA 94116
570 Workshare Technology 275 Middlefield Rd, Menlo Park, CA 94025
571 Sennheiser Technology & Innovation 1390 Market St, San Francisco, CA 94102
572 Parachute Technology 218 9th St (Howard Street), San Francisco, CA 94103
573 Allied Integrated Marketing 1122 Clement St (12th Avenue), San Francisco, CA 94118
574 TransPerfect Med

1122 Morris Tile & Construction Co. 5415 Clayton Rd, Clayton, CA 94517
1123 Pacific Gas & Electric Co. 1501 Loveridge Rd, Pittsburg, CA 94565
1124 Sourdough & Co. 3725 Lone Tree Way, Antioch, CA 94509
1125 Sourdough & Co. 221 Flagstone Dr, Antioch, CA 94509
1126 HealthMarkets Insurance - Michael Strauss 900 A St, Antioch, CA 94509
1127 Diablo Valley Motors 1700 W Leland Rd, Pittsburg, CA 94565
1128 Road Star Motors 2711 Hillcrest Ave (Lakespur Dr), Antioch, CA 94531
1129 CMR 36 Motors 5411 Lone Tree Way, Brentwood, CA 94513
1130 Brown Enterprise Motors 2815 Mitchell Dr (Oak Grove Road), Walnut Creek, CA 94598
1131 Obsibian Motors 1210 Central Blvd, Brentwood, CA 94513
1132 Alban's Motor's 1210 Central Blvd, Brentwood, CA 94513
1133 A & H Motors 1660 Contra Costa Blvd, Pleasant Hill, CA 94523
1134 Calidad Motors Prominent Dr, Brentwood , 94513, United States, Brentwood, CA 94513
1135 RG Motors & Auto Body 1881 Ygnacio Valley Rd (San Carlos), Walnut Creek, CA 94598
1136 Nemesis Motors 13

1673 Japanese Tea Garden at Central Park 429 Main St, Half Moon Bay, CA 94019
1674 Tunitas Creek Beach 421 Main St, Half Moon Bay, CA 94019
1675 Bluff Trail 527 Main St, Half Moon Bay, CA 94019
1676 Cypress Trail 235 Main St (btwn San Mateo Rd & Stone Pine Rd), Half Moon Bay, CA 94019
1677 Fitzgerald Marine Reserve 300 Main St, Half Moon Bay, CA 94019
1678 Stulsaft Park 300 Main St, Half Moon Bay, CA 94019
1679 Stafford Park 448 Main St (at Kelly St), Half Moon Bay, CA 94019
1680 Montara Mountain Middle Peak 604 Main St (at Miramontes St), Half Moon Bay, CA 94019
1681 Wunderlich County Park 645 Kelly St, Half Moon Bay, CA 94019
1682 Washington Park 711 Main St, Half Moon Bay, CA 94019
1683 San Pedro Valley Park 604 Main St, Half Moon Bay, CA 94019
1684 Red Morton Community Park 210 Main St, Half Moon Bay, CA 94019
1685 Ray Park 845 Main St (at Monte Vista Ln), Half Moon Bay, CA 94019
1686 Edgewater Park 101 Main St, Half Moon Bay, CA 94019
1687 Montara State Beach 225 Cabrillo Hwy S, H

2272 Intrexon Corporation 3400 Las Positas Rd (1st Street), Livermore, CA 94551
2273 General Mortgage Capital Corporation 1188 S Livermore Ave, Livermore, CA 94550
2274 Cetacea Corporation 999 Loyola Way, Livermore, CA 94550
2275 Dealix Corporation 7000 East Ave, Livermore, CA 94550
2276 Sound Solutions Veterinary Service Greenville Road, Livermore, CA 94550
2277 Tri Audio Sound Car Audio Specialists 11840 Dublin Blvd, Dublin, CA 94568
2278 Sound Wave Mobile Electronics 7950 Dublin Blvd, Dublin, CA 94568
2279 CAE Sound 7011 Koll Center Pkwy, Pleasanton, CA 94566
2280 Sound Perfection 6700 Koll Center Pkwy, Pleasanton, CA 94566
2281 A Pure Sound 4725 1st St, Pleasanton, CA 94566
2282 Sound Perfection 34009 Alvarado Niles Rd, Union City, CA 94587
2283 Harry McCune Sound Service 25932 Eden Landing Rd, Hayward, CA 94545
2284 Future Sound 11501 Dublin Blvd (Suite 200), Dublin, CA 94568
2285 Light and Sound with ZERO1 Biennial 11501 Dublin Blvd, Dublin, CA 94568
2286 Auto Sound Performance 1

2849 Pallen M2 Martial Arts Saratoga, CA 95070
2850 Mach Martial Arts 2895 Zanker Rd, San Jose, CA 95134
2851 Hayward Martial Arts 675 Sycamore Dr, Milpitas, CA 95035
2852 Animation Dok 235 Midwick Dr, Milpitas, CA 95035
2853 Hexcel Corporation 1001 Schoolhouse Rd, San Jose, CA 95138
2854 Rockin Jump Corporation 6130 Monterey Hwy, San Jose, CA 95138
2855 Thompson Garcia, A Law Corporation 20 Great Oaks Blvd, San Jose, CA 95119
2856 Vie de France Corporation 303 Piercy Rd, San Jose, CA 95138
2857 IOT PAL Corporation 8102 Monterey Rd, San Jose, CA 95013
2858 Kamat Law Corporation 8215 Monterey Hwy, San Jose, CA 95139
2859 Litwin & Smith A Law Corporation 5645 Silver Creek Valley Rd, San Jose, CA 95138
2860 Neo Tech Spring Corporation 800 Embedded Way (at the Plex), San Jose, CA 95138
2861 Zhao Dental Corporation 5705 Cottle Rd, San Jose, CA 95123
2862 Law Offices of James J. Phillips, A Professional Corporation 5855 Cottle Rd, San Jose, CA 95123
2863 Garcia & Gurney, A Law Corporation 58

In [25]:
from collections import defaultdict

# Function to classify buildings
def classify_buildings(company_locations, f_coordinates):
    
    classified_buildings = {}

    for name, address in company_locations.items():
        found = False
        for key, value in classified_buildings.items():
            for location in value:
                if address.startswith(location.split('|')[1].strip()):
                    classified_buildings[key].append(name + ' | ' + address)
                    found = True
                    break
            if found:
                break
        if not found:
            classified_buildings[name] = [name + ' | ' + address]

    return classified_buildings

classified_buildings = classify_buildings(company_locations, f_coordinates)

# Printing the result
num = 0
for key, value in classified_buildings.items():
    print(num)
    print(f"Company/Campus: {key}")
    print("Buildings:")
    for building in value:
        print("\t" + building)
        num += 1
        
print(num)

0
Company/Campus: Rooftop School - Upper Campus
Buildings:
	Rooftop School - Upper Campus | 500 Corbett Ave, San Francisco, CA 94114
1
Company/Campus: Rooftop Elementary School
Buildings:
	Rooftop Elementary School | 443 Burnett Ave, San Francisco, CA 94131
2
Company/Campus: Psi Omega Housing tRust
Buildings:
	Psi Omega Housing tRust | 101 Woodland Ave (Parnassus), San Francisco, CA 94117
3
Company/Campus: UCSF Blood Draw Lab at Parnassus
Buildings:
	UCSF Blood Draw Lab at Parnassus | 400 Parnassus Ave, San Francisco, CA 94143
	UCSF Vascular Laboratory at Parnassus | 400 Parnassus Ave, San Francisco, CA 94143
	Luke Anthony MD-UCSF Orthopaedic Surg | 400 Parnassus Ave, San Francisco, CA 94143
	UCSF Screening & Acute Care | 400 Parnassus Ave, San Francisco, CA 94143
	UCSF Cardiac Electrophysiology and Arrhythmia Service | 400 Parnassus Ave, San Francisco, CA 94143
	UCSF Gastrointestinal Surgery at Parnassus | 400 Parnassus Ave, San Francisco, CA 94143
	UCSF Oculofacial Plastic, Reconstru

Buildings:
	The Apple Cider Press | Hagiwara Tea Garden Dr, San Francisco, CA 94122
463
Company/Campus: Apple Shuttle
Buildings:
	Apple Shuttle | 142 S Van Ness Ave, San Francisco, CA 94103
464
Company/Campus: Ava Apple & Rodolfo Guzman
Buildings:
	Ava Apple & Rodolfo Guzman | 672 S Van Ness Ave, San Francisco, CA 94110
465
Company/Campus: Apple Shuttle - 16th & South Van Ness
Buildings:
	Apple Shuttle - 16th & South Van Ness | 16TH St (South Van Ness), San Francisco, CA 94103
466
Company/Campus: Apple Bus - San Jose & 29th
Buildings:
	Apple Bus - San Jose & 29th | 300-302 30TH St (Church Street), San Francisco, CA 94131
467
Company/Campus: Apple Shuttle - Divisadero/Geary
Buildings:
	Apple Shuttle - Divisadero/Geary | Geary, San Francisco, CA 94115
468
Company/Campus: Green Apple Books
Buildings:
	Green Apple Books | 506 Clement St (at 6th Ave), San Francisco, CA 94118
469
Company/Campus: Apple MacBook Air Support service
Buildings:
	Apple MacBook Air Support service | 3700 California

	DVC Quad | 321 Golf Club Rd (Diablo Valley College), Pleasant Hill, CA 94523
	DVC Music Technology Lab | 321 Golf Club Rd (Diablo Valley College), Pleasant Hill, CA 94523
	DVC Advanced Technology Center | 321 Golf Club Rd (Diablo Valley College), Pleasant Hill, CA 94523
931
Company/Campus: Carrington College
Buildings:
	Carrington College | 5883 Rue Ferrari (Bernal Rd), San Jose, CA 95138
932
Company/Campus: DVC Parking Lot
Buildings:
	DVC Parking Lot | Pleasant Hill, CA 94523
933
Company/Campus: Diablo Valley College
Buildings:
	Diablo Valley College | 321 Golf Club Rd, Pleasant Hill, CA 94523
	Contra Costa Chinese School | 321 Golf Club Rd, Pleasant Hill, CA 94523
	DVC Film Club - Digital Media Lab | 321 Golf Club Rd, Pleasant Hill, CA 94523
936
Company/Campus: Alhambra High School
Buildings:
	Alhambra High School | 150 E St, Martinez, CA 94553
937
Company/Campus: Srvusd
Buildings:
	Srvusd | 699 Old Orchard Dr, Danville, CA 94526
938
Company/Campus: Live Oak Learning
Buildings:
	Liv

	Bloom Retreat | 1444 S Main St, Walnut Creek, CA 94596
1367
Company/Campus: Bloom Salon
Buildings:
	Bloom Salon | 225 Alamo Plz, Alamo, CA 94507
1368
Company/Campus: Trusource Energy
Buildings:
	Trusource Energy | 23 Railroad Ave, Danville, CA 94526
1369
Company/Campus: The Bloom Space
Buildings:
	The Bloom Space | 3391 Mt Diablo Blvd, Danville, CA 94549
1370
Company/Campus: Deborah Bloom Consultants
Buildings:
	Deborah Bloom Consultants | 1 Crow Canyon Ct, San Ramon, CA 94583
1371
Company/Campus: Snail City East Bay
Buildings:
	Snail City East Bay | 1 Marsh Creek, Clayton, CA 94517
1372
Company/Campus: City Of Clayton
Buildings:
	City Of Clayton | 6000 Heritage Trl, Clayton, CA 94517
1373
Company/Campus: Surf City Squeeze
Buildings:
	Surf City Squeeze | 2465 Stoneridge Mall Rd, Pleasanton, CA 94588
1374
Company/Campus: City Juice & Grub
Buildings:
	City Juice & Grub | 2550 Somersville Rd, Antioch, CA 94509
1375
Company/Campus: The City Church
Buildings:
	The City Church | 2400 Sycamo

Company/Campus: Health Hero Network(Bosch Healthcare)
Buildings:
	Health Hero Network(Bosch Healthcare) | 2400 Geng Rd (Embarcadero), Palo Alto, CA 94303
1826
Company/Campus: Palo Alto Golf Course
Buildings:
	Palo Alto Golf Course | 1875 Embarcadero Rd, Palo Alto, CA 94303
1827
Company/Campus: Guitar Hero
Buildings:
	Guitar Hero | 444 Castro St, Mountain View, CA 94041
1828
Company/Campus: Moffett Field Golf Course
Buildings:
	Moffett Field Golf Course | 1080 Lockhead Martin Way, Mountain View, CA 94089
1829
Company/Campus: Les Petit Chefs Academy
Buildings:
	Les Petit Chefs Academy | 70 Crown Ct, San Mateo, CA 94402
1830
Company/Campus: Tzu Chi Academy
Buildings:
	Tzu Chi Academy | 2675 Ralston Ave, Belmont, CA 94002
1831
Company/Campus: Wonderland Academy
Buildings:
	Wonderland Academy | 3470 Glendora Dr (Cherrywood Drive), San Mateo, CA 94403
1832
Company/Campus: Fusion Academy San Mateo
Buildings:
	Fusion Academy San Mateo | 2000 Alameda De Las Pulgas, San Mateo, CA 94403
1833
Comp

Buildings:
	Hayward BART Garage | 699 B St (at Montgomery), Hayward, CA 94541
2267
Company/Campus: Ac Transit Bus #28, Hayward Bart
Buildings:
	Ac Transit Bus #28, Hayward Bart | Hayward, CA 94541
2268
Company/Campus: Hayward BART Station
Buildings:
	Hayward BART Station | 699 B St (at Montgomery Ave), Hayward, CA 94541
2269
Company/Campus: Fremont BART Station
Buildings:
	Fremont BART Station | 2000 Bart Way (at Civic Center Dr), Fremont, CA 94538
2270
Company/Campus: Fremont Bart Station
Buildings:
	Fremont Bart Station | 300, Bart Way, Fremont, CA 94536
2271
Company/Campus: Warm Springs Bart Parking
Buildings:
	Warm Springs Bart Parking | Transit Rd., Fremont, CA 94539
2272
Company/Campus: BART to OAK Midway Station
Buildings:
	BART to OAK Midway Station | Hegenberger Rd (at Airport Access Rd), Oakland, CA 94603
2273
Company/Campus: BART to OAK Coliseum Station
Buildings:
	BART to OAK Coliseum Station | Oakland, CA 94621
2274
Company/Campus: Fruitvale BART Station
Buildings:
	Fruitv

Buildings:
	History San Jose | 1650 Senter Rd (at Phelan Ave), San Jose, CA 95112
2763
Company/Campus: Guadalupe Oak Grove Park
Buildings:
	Guadalupe Oak Grove Park | 5929-5931 Thorntree Dr (at Sterling Oaks Dr), San Jose, CA 95120
2764
Company/Campus: Kelley Park
Buildings:
	Kelley Park | 1300 Senter Rd (btwn Story Rd & Phelan Ave), San Jose, CA 95112
2765
Company/Campus: Jeffrey Fontana Park
Buildings:
	Jeffrey Fontana Park | Golden Oak Way, San Jose, CA 95120
2766
Company/Campus: Japanese Friendship Garden Regional Park
Buildings:
	Japanese Friendship Garden Regional Park | 1300 Senter Rd, San Jose, CA 95112
2767
Company/Campus: Willow Glenish
Buildings:
	Willow Glenish | San Jose, CA 95125
2768
Company/Campus: Emma Prusch Farm Park
Buildings:
	Emma Prusch Farm Park | 647 S King Rd (S King and 680), San Jose, CA 95116
2769
Company/Campus: Happy Hollow Park & Zoo
Buildings:
	Happy Hollow Park & Zoo | 1300 Senter Rd (at Story Rd), San Jose, CA 95112
2770
Company/Campus: Almaden Meadow

	Gateway City Church | 5883 Eden Park Pl, San Jose, CA 95138
3193
Company/Campus: Party City
Buildings:
	Party City | 1986 Tully Rd, San Jose, CA 95122
3194
Company/Campus: City of San Jose Senior Centers Programs
Buildings:
	City of San Jose Senior Centers Programs | 5585 Cottle Rd, San Jose, CA 95123
3195
Company/Campus: Halloween City
Buildings:
	Halloween City | 4095 Evergreen Village Sq, San Jose, CA 95135
3196
Company/Campus: Notary In San Jose City San Jose
Buildings:
	Notary In San Jose City San Jose | 3315 San Felipe Rd, San Jose, CA 95135
3197
Company/Campus: City Of San Jose - Employee Health Services
Buildings:
	City Of San Jose - Employee Health Services | 393 Blossom Hill Rd, San Jose, CA 95123
3198
Company/Campus: Solari Park
Buildings:
	Solari Park | 3590 Cas Dr (at Los Arboles Ave), San Jose, CA 95111
3199
Company/Campus: City101 CCTV and Surveillance Installation
Buildings:
	City101 CCTV and Surveillance Installation | 2690 S White Rd, San Jose, CA 95148
3200
Company/

In [26]:
len(f_formatted_address), len(classified_buildings.keys())

(3689, 3005)

We can see that we managed to simplify ourpoints down significantly. Furthermore, we want to ensure we still have BART Stations in our data set.

In [27]:
subs = 'BART Station'
 
res = set(list(filter(lambda x: subs in x, sorted(list(set(classified_buildings.keys()))))))
 
# Printing the resultant string
print("All strings with given substring are : " + str(res))

All strings with given substring are : {'West Dublin/Pleasanton BART Station', 'Montgomery St BART Station', 'San Bruno BART Station', 'Fremont BART Station', 'Santana Row BART Station', 'Lafayette BART Station', 'Lake Merritt BART Station', 'Oak Grove BART Station', 'South Hayward BART Station', 'Milpitas BART Station', 'Balboa Park BART Station', 'Dublin/Pleasanton BART Station', 'Samtrans Bus Stop Colma BART Station', 'Union City BART Station', 'Fruitvale BART Station', 'San Antonio BART Station', 'Glen Park BART Station', 'Hayward BART Station'}


So we want to use only the points returned after coupling nearby properties together by address

In [28]:
# Declare HashTable
HT = {}

# Declare the decoupled_lists

d_names = []
d_coordinates = []
d_county = []
d_formatted_address = []

for key, value in classified_buildings.items():
    
#     print(value[0].split(" | "))
        
    try:
        i = f_names.index(value[0].split(" | ")[0])

        d_names.append(value[0].split(" | ")[0])
        d_formatted_address.append(value[0].split(" | ")[1])

        d_coordinates.append(f_coordinates[i])
        d_county.append(f_county[i])

    except:
        pass

# All lists have the same length 
print(len(d_names) == len(d_coordinates) == len(d_county) == len(d_formatted_address))

# The new number of points
print(len(d_names))

True
2993


In [29]:
subs = 'BART Station'
 
res = set(list(filter(lambda x: subs in x, d_names)))
 
# Printing the resultant string
print("All strings with given substring are : " + str(res))

All strings with given substring are : {'West Dublin/Pleasanton BART Station', 'Montgomery St BART Station', 'San Antonio BART Station', 'Fremont BART Station', 'Santana Row BART Station', 'Lafayette BART Station', 'Lake Merritt BART Station', 'Oak Grove BART Station', 'South Hayward BART Station', 'Milpitas BART Station', 'Balboa Park BART Station', 'Dublin/Pleasanton BART Station', 'Samtrans Bus Stop Colma BART Station', 'Union City BART Station', 'Fruitvale BART Station', 'San Bruno BART Station', 'Glen Park BART Station', 'Hayward BART Station'}


Next we need to generate population estimates. To keep things simple we can just assign some random values

In [30]:
# Verify by hand that coordinates and addresses link to the corresponding name

for i, addr in enumerate(d_formatted_address):
    print(addr, d_coordinates[i])

500 Corbett Ave, San Francisco, CA 94114 [37.75751, -122.444699]
443 Burnett Ave, San Francisco, CA 94131 [37.754811, -122.44325]
101 Woodland Ave (Parnassus), San Francisco, CA 94117 [37.762367, -122.453371]
400 Parnassus Ave, San Francisco, CA 94143 [37.764114, -122.457416]
2 Koret Way, San Francisco, CA 94143 [37.762525, -122.458969]
500 Parnassus Ave, San Francisco, CA 94143 [37.763804, -122.459332]
1250 Waller St, San Francisco, CA 94117 [37.769761, -122.444279]
3371 16th St (Church), San Francisco, CA 94114 [37.764214, -122.4284]
1500 Grove St (Baker), San Francisco, CA 94117 [37.775611, -122.441688]
2130 Fulton St (Cole St), San Francisco, CA 94117 [37.776554, -122.450272]
650 Parker Ave (at Fulton St), San Francisco, CA 94118 [37.775533, -122.452505]
299 Dolores St (16th), San Francisco, CA 94103 [37.764844, -122.426144]
2130 Fulton St (Stanyan), San Francisco, CA 94117 [37.776217, -122.451256]
2325 Golden Gate Ave, San Francisco, CA 94118 [37.776533, -122.448838]
2130 Fulton S

650 Townsend St (6A-34), San Francisco, CA 94103 [37.770984, -122.403499]
650 Townsend St (Townsend & 8th), San Francisco, CA 94103 [37.771358, -122.403306]
24 Willie Mays Plz, San Francisco, CA 94107 [37.778723, -122.389833]
2700 E Leland Rd, Pittsburg, CA 94565 [38.005483, -121.860486]
4700 Ygnacio Valley Rd, Concord, CA 94521 [37.944416, -121.977226]
180 Cerezo Dr (at Wiget Ln), Walnut Creek, CA 94598 [37.918614, -122.02013]
100 Ellinwood Way, Pleasant Hill, CA 94523 [37.974692, -122.027201]
2100 Mount Diablo Scenic Blvd (Diablo Rd/Blackhawk Rd), Danville, CA 94506 [37.833806, -121.950218]
101 Sand Creek Rd, Brentwood, CA 94513 [37.948262, -121.69784]
929 2nd St, Brentwood, CA 94513 [37.935558, -121.696309]
321 Golf Club Rd (Diablo Valley College), Pleasant Hill, CA 94523 [37.968065, -122.069798]
5883 Rue Ferrari (Bernal Rd), San Jose, CA 95138 [37.962608, -122.072083]
Pleasant Hill, CA 94523 [37.968988, -122.070569]
321 Golf Club Rd, Pleasant Hill, CA 94523 [37.968652, -122.070695]

1040 Grant Rd, Mountain View, CA 94040 [37.379417, -122.075466]
951 Old County Rd, Belmont, CA 94002 [37.521549, -122.275609]
306 6th Ave, San Mateo, CA 94401 [37.563492, -122.31968]
345 Shoreway Rd, San Carlos, CA 94070 [37.516411, -122.257567]
2075 S Norfolk St, San Mateo, CA 94403 [37.553147, -122.290023]
201 Redwood Shores Pkwy, Redwood City, CA 94065 [37.520339, -122.25407]
1445 Rollins Rd, Burlingame, CA 94010 [37.59215, -122.369299]
950 Tower Ln, Foster City, CA 94404 [37.558261, -122.27636]
2490 Charleston Rd, Mountain View, CA 94043 [37.493607, -122.214471]
3700 Haven Ct, Menlo Park, CA 94025 [37.484626, -122.181755]
Tanforan Mall, San Bruno, CA 94066 [37.636683, -122.417862]
348 Via Pueblo Mall, Stanford, CA 94305 [37.428794, -122.174094]
William F Durand Building (496 Lomita Mall), Stanford, CA 94305 [37.426888, -122.173271]
466 Lomita Mall, Stanford, CA 94305 [37.42774, -122.172818]
1003 Elwell Ct, Palo Alto, CA 94303 [37.431955, -122.104148]
1245 Space Park Way (Geneva), S

3095 Yerba Buena Rd (San Felipe Rd), San Jose, CA 95135 [37.30102, -121.769518]
5629 Lean Ave (Blossom Hiil), San Jose, CA 95123 [37.250705, -121.817767]
San Jose, CA 95121 [37.307522, -121.798148]
3703 Silver Creek Rd, San Jose, CA 95121 [37.301966, -121.806662]
Welch Park Community Building Kenesta Way and Clarice Dr (at Kanesta Way), San Jose, CA 95122 [37.325707, -121.823458]
4848 Pearl Ave (Kozera), San Jose, CA 95136 [37.262013, -121.862447]
4849 Pearl Ave (Minas Dr), San Jose, CA 95136 [37.261522, -121.865212]
645 Wool Creek Dr, San Jose, CA 95112 [37.314756, -121.85073]
1879 Senter Rd (Story), San Jose, CA 95112 [37.314633, -121.856665]
1250 N 10th St (at Humboldt St), San Jose, CA 95112 [37.323462, -121.864582]
1257 S 10TH St (at E Alma Ave), San Jose, CA 95112 [37.319751, -121.868321]
1200 S 7TH St (at Humboldt St), San Jose, CA 95112 [37.320206, -121.870708]
1582 Willowgate Dr, San Jose, CA 95118 [37.263679, -121.900169]
360 E Reed St, San Jose, CA 95112 [37.330609, -121.875

In [31]:
rng = np.random.default_rng()

gen_population = list(rng.integers(low=100, high=3000, size=len(d_names)))

len(gen_population)

2993

Finally we need to calculate the distances between points in the same counties

To keep our maps fairly simple, we will only find the 3 closest nodes from each node and store their distances respectively.

In [32]:
# Initialze the various maps we will need ahead of time to save on computing

maps = {}

for county in set(d_county):
    maps[county] = ox.graph_from_place(f"{county}, California, USA", network_type="drive")
    
print("Done!")

Done!


In [33]:
# Separate entries by county

len(d_names) == len(d_coordinates) == len(d_county) == len(d_formatted_address)


True

In [34]:
# We want to exclude indices mapping to BART Stations from the county indices

BART_indicies = []
BART_Names = list(res)

for name in BART_Names:
    ind = d_names.index(name)
    BART_indicies.append(ind)

county_indices = {}

for i, county in enumerate(d_county):
    
    if county not in county_indices:
        if i not in BART_indicies:
            county_indices[county] = [i]
    else:
        if i not in BART_indicies:
            county_indices[county].append(i)

subset_ind = {}
limit = 20

random.seed(1)

for county in set(d_county):
    sequence = county_indices[county]
    subset = random.sample(sequence, limit)
    subset_ind[county] = subset

print(subset_ind)

{'San Mateo': [1476, 1921, 1402, 1600, 1459, 1846, 1799, 1822, 1727, 1553, 1434, 1838, 1367, 1738, 1782, 1340, 1795, 1611, 1573, 1442], 'Contra Costa': [1136, 838, 829, 833, 816, 1201, 1032, 1243, 836, 1038, 1259, 1318, 1049, 1164, 1047, 1035, 1281, 1107, 1237, 910], 'Alameda': [2023, 2256, 2304, 2374, 2085, 1988, 2314, 2104, 2303, 2298, 2190, 2150, 2193, 2358, 2277, 2026, 2089, 2079, 2234, 2385], 'Santa Clara': [2904, 2910, 2795, 2426, 2884, 2641, 2806, 2817, 2570, 2768, 2954, 2776, 2479, 2842, 2913, 2501, 2560, 2926, 2772, 2894], 'San Francisco': [754, 30, 484, 44, 319, 724, 633, 611, 596, 407, 666, 178, 176, 518, 236, 12, 793, 208, 556, 565]}


In [35]:
list(subset_ind.keys())

['San Mateo', 'Contra Costa', 'Alameda', 'Santa Clara', 'San Francisco']

In [36]:
# Declare arrays to keep track of each other node's closest 3 nodes and those distances

# def get_rand_location(county):
#     sequence = county_indices[county]
#     subset = random.sample(sequence, 1)
    
#     return subset[0]

final__names = []
final_coordinates = [] 
final_county = []
final_formatted_address = []
final_estimated_population = []
n_nearest_nodes = []
n_nearest_distances = []
n_nearest_counties = []

for k, county in enumerate(list(subset_ind.keys())):

    for i, origin in enumerate([ d_coordinates[index] for index in subset_ind[county] ]):

        temp_nearest_nodes = [[-360, -360],[-360, -360],[-360, -360]]
        temp_nearest_distances = [1000000, 1000000, 1000000]

        G = maps[county]

        for j, dest in enumerate([ d_coordinates[index] for index in subset_ind[county] ]):

            if i != j and d_county[i] == d_county[j]:
                
                try:
                    origin_coordinates = tuple(origin)
                    destination_coordinates = tuple(dest)
                    route = tc.distance.shortest_path(G, origin_coordinates, destination_coordinates)

                    distance = route[0]
                    
                    if distance != 0 and distance not in temp_nearest_distances:
                        
                        temp_nearest_distances.append(distance)
                        temp_nearest_nodes.append(dest)

                        ind = temp_nearest_distances.index(max(temp_nearest_distances))

                        del temp_nearest_distances[ind]
                        del temp_nearest_nodes[ind]
                
                except:
                    pass

            clear_output(wait=True)
            print(f"{k + 1} out of {len(list(subset_ind.keys()))} counties | node {i + 1} | {j + 1}/{len([ d_coordinates[index] for index in subset_ind[county] ])} of  distances calculated")
        
        final__names.append(d_names[subset_ind[county][i]])
        final_coordinates.append(d_coordinates[subset_ind[county][i]])
        final_county.append(d_county[subset_ind[county][i]])
        final_formatted_address.append(d_formatted_address[subset_ind[county][i]])
        final_estimated_population.append(gen_population[subset_ind[county][i]])
        n_nearest_nodes.append(temp_nearest_nodes)
        n_nearest_distances.append(temp_nearest_distances)
        n_nearest_counties.append(county)


5 out of 5 counties | node 20 | 20/20 of  distances calculated


In [37]:
for i in range(len(n_nearest_distances)):
    print(n_nearest_counties[i], n_nearest_distances[i], n_nearest_nodes[i])

San Mateo [5629.15538222503, 7902.1358544515415, 1246.20950334946] [[37.478609, -122.291883], [37.444031, -122.161216], [37.491431, -122.228803]]
San Mateo [21589.095445015417, 21269.789076903635, 15705.57337174754] [[37.459552, -122.429672], [37.462736, -122.429145], [37.633669, -122.397713]]
San Mateo [5606.49938222503, 8101.156342041771, 6747.760636868368] [[37.485814, -122.238715], [37.513253, -122.33258], [37.491431, -122.228803]]
San Mateo [4426.607702464451, 2830.9154944478664, 5630.481052128238] [[37.513253, -122.33258], [37.535644, -122.334809], [37.545193, -122.306168]]
San Mateo [4426.607702464451, 3428.965860549619, 5643.7344182299885] [[37.534532, -122.331311], [37.535644, -122.334809], [37.545193, -122.306168]]
San Mateo [19349.513763573137, 15095.695129674887, 18113.669921658304] [[37.534532, -122.331311], [37.513253, -122.33258], [37.535644, -122.334809]]
San Mateo [10.495742806963278, 168.18913838896188, 6394.392144658149] [[37.411139, -122.12431], [37.423319, -122.097

In [38]:
len(final__names) == len(final_coordinates) == len(final_county) == len(final_formatted_address) == len(final_estimated_population) == len(n_nearest_nodes) == len(n_nearest_distances) == len(n_nearest_counties)

True

In [39]:
n_nearest_nodes[0]

[[37.478609, -122.291883], [37.444031, -122.161216], [37.491431, -122.228803]]

In [40]:
# Save dataset as a csv file

with open('locations_project.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    
    field = ['name', 'county', 'coordinates', 'esti. pop.', 'nearest node one', 'nearest node 2', 'nearest node 3', 'distance 1', 'distance 2', 'distance 3']

    writer.writerow(field)
    
    for i in range(len(final__names)):
        writer.writerow([ final__names[i], final_county[i], tuple(final_coordinates[i]), final_estimated_population[i], tuple(n_nearest_nodes[i][0]), tuple(n_nearest_nodes[i][1]), tuple(n_nearest_nodes[i][2]), n_nearest_distances[i][0], n_nearest_distances[i][1], n_nearest_distances[i][2] ])
    
    

We also need to create a csv file of all BART stations

In [43]:
# Save dataset as a csv file

with open('BART_Stations_project.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    
    field = ['name', 'county', 'coordinates', 'esti. pop.']

    writer.writerow(field)
    
    for i in range(len(BART_Names)):
        writer.writerow([ BART_Names[i], d_county[BART_indicies[i]], tuple(d_coordinates[BART_indicies[i]]), gen_population[BART_indicies[i]] ])
   