# BART Map

![Bart Map](bart_map.png)

In [1]:
import csv

import math
import numpy as np
import pandas as pd

import psycopg2

In [2]:
#
# function to run a select query and return rows in a pandas dataframe
# pandas puts all numeric values from postgres to float
# if it will fit in an integer, change it to integer
#

def my_select_query_pandas(query, rollback_before_flag, rollback_after_flag):
    "function to run a select query and return rows in a pandas dataframe"
    
    if rollback_before_flag:
        connection.rollback()
    
    df = pd.read_sql_query(query, connection)
    
    if rollback_after_flag:
        connection.rollback()
    
    # fix the float columns that really should be integers
    
    for column in df:
    
        if df[column].dtype == "float64":

            fraction_flag = False

            for value in df[column].values:
                
                if not np.isnan(value):
                    if value - math.floor(value) != 0:
                        fraction_flag = True

            if not fraction_flag:
                df[column] = df[column].astype('Int64')
    
    return(df)
    

In [3]:
connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

In [4]:
cursor = connection.cursor()

In [5]:
def my_read_csv_file(file_name, limit):
    "read the csv file and print only the first limit rows"
    
    csv_file = open(file_name, "r")
    
    csv_data = csv.reader(csv_file)
    
    i = 0
    
    for row in csv_data:
        i += 1
        if i <= limit:
            print(row)
            
    print("\nPrinted ", min(limit, i), "lines of ", i, "total lines.")

# 1. Drop the stations table if it exists

The stations table should be named stations

In [6]:
connection.rollback()

query = """

drop table if exists stations;

"""

cursor.execute(query)

connection.commit()

# 2. Create the stations table

In [7]:
connection.rollback()

query = """

create table stations (
  station varchar(32),
  latitude numeric(9,6),
  longitude numeric(9,6),
  transfer_time numeric(3),
  primary key (station)
);

"""

cursor.execute(query)

connection.commit()

# 3. Display the file stations.csv

In [8]:
my_read_csv_file("stations.csv", limit=10)

['station', 'latitude', 'longitude', 'transfer_time']
['12th Street', '37.803608', '-122.272006', '282']
['16th Street Mission', '37.764847', '-122.420042', '287']
['19th Street', '37.807869', '-122.26898', '67']
['24th Street Mission', '37.752', '-122.4187', '277']
['Antioch', '37.996281', '-121.783404', '0']
['Ashby', '37.853068', '-122.269957', '299']
['Balboa Park', '37.721667', '-122.4475', '48']
['Bay Fair', '37.697', '-122.1265', '63']
['Berryessa', '37.368361', '-121.874655', '288']

Printed  10 lines of  51 total lines.


# 4. Load stations data into database table

In [9]:
connection.rollback()

query = """

copy stations
from '/user/projects/project-3-Shuo-Wang-UCBerkeley/exercise/solutions/stations.csv' delimiter ',' NULL '' csv header;

"""

cursor.execute(query)

connection.commit()

# 5. Verify the lines loaded correctly

In [10]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select *
from stations
order by station

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,station,latitude,longitude,transfer_time
0,12th Street,37.803608,-122.272006,282
1,16th Street Mission,37.764847,-122.420042,287
2,19th Street,37.807869,-122.26898,67
3,24th Street Mission,37.752,-122.4187,277
4,Antioch,37.996281,-121.783404,0
5,Ashby,37.853068,-122.269957,299
6,Balboa Park,37.721667,-122.4475,48
7,Bay Fair,37.697,-122.1265,63
8,Berryessa,37.368361,-121.874655,288
9,Castro Valley,37.690748,-122.075679,0


# 6. Display Zip_codes Table

In [18]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select *
from zip_codes
group by zip
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,zip,latitude,longitude,city,state,population,area,density,time_zone
0,45319,39.9161,-83.9434,Donnelsville,OH,378,0.4626,817.14,America/New_York
1,66067,38.6206,-95.2757,Ottawa,KS,15253,156.6280,97.38,America/Chicago
2,22209,38.8947,-77.0754,Arlington,VA,12705,0.6191,20520.48,America/New_York
3,72143,35.2289,-91.7356,Searcy,AR,35997,311.6259,115.51,America/Chicago
4,78407,27.8108,-97.4390,Corpus Christi,TX,3078,4.2128,730.64,America/Chicago
...,...,...,...,...,...,...,...,...,...
32718,97022,45.3460,-122.3304,Eagle Creek,OR,3770,25.0104,150.74,America/Los_Angeles
32719,25520,38.5610,-82.1774,Glenwood,WV,1883,47.5183,39.63,America/New_York
32720,71661,33.1490,-91.5471,Parkdale,AR,442,94.8095,4.66,America/Chicago
32721,01718,42.5197,-71.4294,Acton,MA,813,0.0805,10100.70,America/New_York


# 7. Add the zipcode for BART (combining with table zip_codes by latitude and longtitude)

In [12]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select distinct b.zip
from stations a
     join zip_codes b
       on round(a.latitude,0) = round(b.latitude,0) and round(a.longitude,0) = round(b.longitude,0)
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,zip
0,93901
1,93905
2,93906
3,93907
4,93908
...,...
264,95680
265,95687
266,95688
267,95690


In [13]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select b.zip
from stations a
     join zip_codes b
       on round(a.latitude,1) = round(b.latitude,1) and round(a.longitude,1) = round(b.longitude,1)
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,zip
0,95131
1,95035
2,95134
3,95133
4,95116
...,...
306,94519
307,94565
308,94565
309,94509


In [14]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select  a.station, b.zip
from stations a
     join zip_codes b
       on abs(a.latitude-b.latitude)<0.014 and abs(a.longitude-b.longitude)<0.014
order by 2
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,station,zip
0,Civic Center,94102
1,Powell Street,94102
2,16th Street Mission,94103
3,Civic Center,94103
4,Powell Street,94103
5,Civic Center,94104
6,Embarcadero,94104
7,Montgomery Street,94104
8,Powell Street,94104
9,Embarcadero,94105
