# BART Map

![Bart Map](bart_map.png)

In [18]:
import math
import numpy as np
import pandas as pd

import psycopg2

from geographiclib.geodesic import Geodesic
import gmaps
import gmaps.datasets

In [2]:
def my_calculate_box(point, miles):
    "Given a point and miles, calculate the box in form left, right, top, bottom"
    
    geod = Geodesic.WGS84

    kilometers = miles * 1.60934
    meters = kilometers * 1000

    g = geod.Direct(point[0], point[1], 270, meters)
    left = (g['lat2'], g['lon2'])

    g = geod.Direct(point[0], point[1], 90, meters)
    right = (g['lat2'], g['lon2'])

    g = geod.Direct(point[0], point[1], 0, meters)
    top = (g['lat2'], g['lon2'])

    g = geod.Direct(point[0], point[1], 180, meters)
    bottom = (g['lat2'], g['lon2'])
    
    return(left, right, top, bottom)

In [3]:
def my_station_get_zips(station, miles):
    "given a station, pull all zip codes with miles distance, print them, sum the population"
    
    connection.rollback()
    
    query = "select latitude, longitude from stations "
    query += "where station = '" + station + "'"
    
    cursor.execute(query)
    
    connection.rollback()
    
    rows = cursor.fetchall()
    
    for row in rows:
        latitude = row[0]
        longitude = row[1]
        
    point = (latitude, longitude)
        
    (left, right, top, bottom) = my_calculate_box(point, miles)
    
    query = "select zip, population from zip_codes "
    query += " where latitude >= " + str(bottom[0])
    query += " and latitude <= " + str(top [0])
    query += " and longitude >= " + str(left[1])
    query += " and longitude <= " + str(right[1])
    query += " order by 1 "

    cursor.execute(query)
    
    connection.rollback()
    
    rows = cursor.fetchall()
    
    print("\n-------------------------------------------------------------------------------")
    print("  Zip Codes within " + str(miles) + " mile(s) of " + station + " BART Station")
    print("-------------------------------------------------------------------------------\n")
    
    total_population = 0
    
    for row in rows:
        zip = row[0]
        population = row[1]
        print("     zip:", zip, "  population: ", f'{population:10,}')
        total_population += population
        
    
    print("\n-------------------------------------------------------------------------------")
    print("  Total Population: ", f'{total_population:10,}')
    print("-------------------------------------------------------------------------------")

In [4]:
connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

In [5]:
cursor = connection.cursor()

In [6]:
#
# function to run a select query and return rows in a pandas dataframe
# pandas puts all numeric values from postgres to float
# if it will fit in an integer, change it to integer
#

def my_select_query_pandas(query, rollback_before_flag, rollback_after_flag):
    "function to run a select query and return rows in a pandas dataframe"
    
    if rollback_before_flag:
        connection.rollback()
    
    df = pd.read_sql_query(query, connection)
    
    if rollback_after_flag:
        connection.rollback()
    
    # fix the float columns that really should be integers
    
    for column in df:
    
        if df[column].dtype == "float64":

            fraction_flag = False

            for value in df[column].values:
                
                if not np.isnan(value):
                    if value - math.floor(value) != 0:
                        fraction_flag = True

            if not fraction_flag:
                df[column] = df[column].astype('Int64')
    
    return(df)
    

# Table customers

In [7]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select *
from customers

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,customer_id,first_name,last_name,street,city,state,zip,closest_store_id,distance
0,1,Robb,Weaving,5 Ramsey Place,Oakland,CA,94609,1,1
1,2,Robby,Belliard,6 Londonderry Plaza,Oakland,CA,94609,1,1
2,3,Sadella,Caudrelier,548 Mcguire Parkway,Oakland,CA,94609,1,1
3,4,Holmes,Shimmings,99 Kennedy Court,Oakland,CA,94609,1,1
4,5,Beverley,Gubbin,51 Mcbride Drive,Oakland,CA,94609,1,1
...,...,...,...,...,...,...,...,...,...
31077,31078,Hugo,Domeney,529 5th Plaza,Thompsons Station,TN,37179,5,25
31078,31079,Glenn,Putson,1347 Westend Crossing,Thompsons Station,TN,37179,5,25
31079,31080,Minnie,Antham,9 Judy Place,Thompsons Station,TN,37179,5,25
31080,31081,Linet,Djorvic,29 Trailsway Drive,Thompsons Station,TN,37179,5,25


# count customer number group by zip code

In [8]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select zip, count(*)
from customers 
group by zip

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,zip,count
0,37062,8
1,94066,9
2,75023,7
3,33131,115
4,76180,1
...,...,...
545,33168,61
546,76012,1
547,33128,76
548,94591,5


# join customer table with zip_codes table

In [9]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select a.zip, count(a.customer_id), b.latitude, b.longitude
from customers a
     join zip_codes b
       on a.zip = b.zip
group by 1,3,4
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,zip,count,latitude,longitude
0,94065,6,37.5354,-122.2467
1,94590,1,38.1030,-122.2486
2,98105,177,47.6607,-122.2840
3,75042,76,32.9139,-96.6749
4,75203,173,32.7463,-96.8030
...,...,...,...,...
545,94588,1,37.7375,-121.8818
546,75032,7,32.8550,-96.4276
547,33055,47,25.9482,-80.2780
548,94801,58,37.9496,-122.3811


# Find all zip codes, with population, within 1 mile of the Downtown Berkeley station

In [10]:
my_station_get_zips('Downtown Berkeley', 1)


-------------------------------------------------------------------------------
  Zip Codes within 1 mile(s) of Downtown Berkeley BART Station
-------------------------------------------------------------------------------

     zip: 94702   population:      17,092
     zip: 94703   population:      21,937
     zip: 94704   population:      29,190
     zip: 94709   population:      11,740
     zip: 94720   population:       2,971

-------------------------------------------------------------------------------
  Total Population:      82,930
-------------------------------------------------------------------------------


# double check population at zip 94702 is same with above

In [11]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select zip, sum(population)
from zip_codes
where zip = '94702'
group by zip
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,zip,sum
0,94702,17092


# drop zip_count_customer table if it exists

In [12]:
connection.rollback()

query = """

drop table if exists zip_count_customer;

"""

cursor.execute(query)

connection.commit()

# create zip_count_customer table (join customer table with zip_codes table)

In [13]:
connection.rollback()

query = """

create table zip_count_customer 

as select a.zip, count(a.customer_id) as customer_number, b.latitude, b.longitude
from customers a
     join zip_codes b
       on a.zip = b.zip
group by 1,3,4

"""

cursor.execute(query)

connection.commit()

# Verify the zip_count_customer table loaded correctly

In [14]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select *
from zip_count_customer
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,zip,customer_number,latitude,longitude
0,94065,6,37.5354,-122.2467
1,94590,1,38.1030,-122.2486
2,98105,177,47.6607,-122.2840
3,75042,76,32.9139,-96.6749
4,75203,173,32.7463,-96.8030
...,...,...,...,...
545,94588,1,37.7375,-121.8818
546,75032,7,32.8550,-96.4276
547,33055,47,25.9482,-80.2780
548,94801,58,37.9496,-122.3811


# create function to get zip codes around a station within miles and total customer number

In [15]:
def my_station_get_zips(station, miles):
    "given a station, pull all zip codes with miles distance, print them, sum the customer_number"
    
    connection.rollback()
    
    query = "select latitude, longitude from stations "
    query += "where station = '" + station + "'"
    
    cursor.execute(query)
    
    connection.rollback()
    
    rows = cursor.fetchall()
    
    for row in rows:
        latitude = row[0]
        longitude = row[1]
        
    point = (latitude, longitude)
        
    (left, right, top, bottom) = my_calculate_box(point, miles)
    
    query = "select zip, customer_number from zip_count_customer "
    query += " where latitude >= " + str(bottom[0])
    query += " and latitude <= " + str(top [0])
    query += " and longitude >= " + str(left[1])
    query += " and longitude <= " + str(right[1])
    query += " order by 1 "

    cursor.execute(query)
    
    connection.rollback()
    
    rows = cursor.fetchall()
    
    print("\n-------------------------------------------------------------------------------")
    print("  Zip Codes within " + str(miles) + " mile(s) of " + station + " BART Station")
    print("-------------------------------------------------------------------------------\n")
    
    total_customer_number = 0
    
    for row in rows:
        zip = row[0]
        customer_number = row[1]
        print("     zip:", zip, "  customer_number: ", f'{customer_number:10,}')
        total_customer_number += customer_number
        
    
    print("\n-------------------------------------------------------------------------------")
    print("  Total customer_number: ", f'{total_customer_number:10,}')
    print("-------------------------------------------------------------------------------")

# read station table into dateframe

In [16]:
df = pd.read_csv ('stations.csv')
df

Unnamed: 0,station,latitude,longitude,transfer_time
0,12th Street,37.803608,-122.272006,282
1,16th Street Mission,37.764847,-122.420042,287
2,19th Street,37.807869,-122.26898,67
3,24th Street Mission,37.752,-122.4187,277
4,Antioch,37.996281,-121.783404,0
5,Ashby,37.853068,-122.269957,299
6,Balboa Park,37.721667,-122.4475,48
7,Bay Fair,37.697,-122.1265,63
8,Berryessa,37.368361,-121.874655,288
9,Castro Valley,37.690748,-122.075679,0


# get zip codes around all stations within 1 mile and total customer number

In [17]:
for station_name in df['station']:
#     print(station_name)
    my_station_get_zips(station_name, 1)
    


-------------------------------------------------------------------------------
  Zip Codes within 1 mile(s) of 12th Street BART Station
-------------------------------------------------------------------------------

     zip: 94612   customer_number:         161

-------------------------------------------------------------------------------
  Total customer_number:         161
-------------------------------------------------------------------------------

-------------------------------------------------------------------------------
  Zip Codes within 1 mile(s) of 16th Street Mission BART Station
-------------------------------------------------------------------------------

     zip: 94103   customer_number:          77
     zip: 94114   customer_number:          58

-------------------------------------------------------------------------------
  Total customer_number:         135
-------------------------------------------------------------------------------

----------------

  Total customer_number:         103
-------------------------------------------------------------------------------

-------------------------------------------------------------------------------
  Zip Codes within 1 mile(s) of Hayward BART Station
-------------------------------------------------------------------------------

     zip: 94541   customer_number:          10

-------------------------------------------------------------------------------
  Total customer_number:          10
-------------------------------------------------------------------------------

-------------------------------------------------------------------------------
  Zip Codes within 1 mile(s) of Lafayette BART Station
-------------------------------------------------------------------------------

     zip: 94549   customer_number:         129

-------------------------------------------------------------------------------
  Total customer_number:         129
-----------------------------------------

In [19]:
gmaps.configure(api_key='AIzaSyDN7R9rXpUPFX1aakqfuMqMf5RYviNmSj0')
