In [1]:
# import dependencies
import sqlalchemy
from sqlalchemy import Column, Integer, String, Numeric, Float, func
from sqlalchemy import create_engine, inspect
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
import pandas as pd
import numpy as np
import requests, time, json
import googlemaps, gmaps
from API_KEY import API_KEY
from pprint import pprint


In [2]:
# READ in scrubbed New York brewery (from Kaggle.com) 
# csv file after correcting city, state and zip code for 5 locations in Excel.
# created api_address column manually in the event is would be needed to get geocodes for cross reference
brewfile = "nybrews_4api_file2.csv"
newbrew_df = pd.read_csv(brewfile)
newbrew_df.head()

Unnamed: 0,index2,brewery_name,address,city,state,zip_code,api_address
0,0,Typhoon Brewery,22 E. 54th St. (@ Madison),New York,NY,10022,"22+E.+54th+St.+(@+Madison),New+York,NY,10022"
1,1,The Riverosa Company,101 W. 75th St. Suite 5B,New York,NY,10023,"101+W.+75th+St.+Suite+5B,New+York,NY,10023"
2,2,Spring Street Brewing Company,113 University Place - 11th Floor,New York,NY,10003,"113+University+Place+-+11th+Floor,New+York,NY,..."
3,3,Rheingold Brewing Company,130 West 42nd St.,New York,NY,10036,"130+West+42nd+St.,New+York,NY,10036"
4,4,Premier Beverages LLC,590 Madison Ave. 21st Fl.,New York,NY,10022,"590+Madison+Ave.+21st+Fl.,New+York,NY,10022"


In [3]:
# dropping index2 and api_address columns given it was determined they are no longer needed for this exercise
newbrew_df = newbrew_df.drop(['index2', 'api_address'], axis=1)
newbrew_df.head()

Unnamed: 0,brewery_name,address,city,state,zip_code
0,Typhoon Brewery,22 E. 54th St. (@ Madison),New York,NY,10022
1,The Riverosa Company,101 W. 75th St. Suite 5B,New York,NY,10023
2,Spring Street Brewing Company,113 University Place - 11th Floor,New York,NY,10003
3,Rheingold Brewing Company,130 West 42nd St.,New York,NY,10036
4,Premier Beverages LLC,590 Madison Ave. 21st Fl.,New York,NY,10022


In [4]:
################################################################################

In [5]:
# API_KEY = 'INSERT_MAPS_API_KEY_HERE'
gmaps = googlemaps.Client(key=API_KEY)

In [6]:
# use googlemaps to get geo coordinates for the 16 brewery addresses on our BREWERY TOUR itinerary 
def get_coordinates(address):
    city = 'New York, US'
    geocode_result = gmaps.geocode(str(address) +' '+ city)
    if len(geocode_result) > 0:
        return list(geocode_result[0]['geometry']['location'].values())
    else:
        return [np.NaN, np.NaN]

coordinates = newbrew_df['address'].apply(lambda x: pd.Series(get_coordinates(x), index=['LATITUDE', 'LONGITUDE']))
newbrew_df5 = pd.concat([newbrew_df[:], coordinates[:]], axis="columns")

In [7]:
# Assign better column headers
newbrew_df5.columns = ['brewery','address','city','state','zip_code','brew_lat', 'brew_lon']
newbrew_df5

Unnamed: 0,brewery,address,city,state,zip_code,brew_lat,brew_lon
0,Typhoon Brewery,22 E. 54th St. (@ Madison),New York,NY,10022,40.760342,-73.974326
1,The Riverosa Company,101 W. 75th St. Suite 5B,New York,NY,10023,40.779579,-73.977562
2,Spring Street Brewing Company,113 University Place - 11th Floor,New York,NY,10003,40.734271,-73.992162
3,Rheingold Brewing Company,130 West 42nd St.,New York,NY,10036,40.755123,-73.985463
4,Premier Beverages LLC,590 Madison Ave. 21st Fl.,New York,NY,10022,40.76211,-73.973069
5,New Amsterdam Brewing Co.,257 Park Ave. S.,New York,NY,10010,40.738623,-73.987135
6,Neptune Brewery,448 W. 16th St.,New York,NY,10011,40.742893,-74.006416
7,Nacho Mama's Brewery,42-42 Thompson St.,New York,NY,10013,40.723304,-74.003707
8,Manhattan Brewing Co. Restaurant,40-42 Thompson St.,New York,NY,10013,40.723278,-74.003745
9,John Street Bar & Grill,17 John St.,New York,NY,10038,40.70997,-74.008741


In [8]:
# Establish connection with Postgres etlproj_db and engine for query purposes 
from postgreslogin import postgres_key

MyPostLogin = postgres_key
rds_connection_string = MyPostLogin
engine = create_engine(f'postgresql://{rds_connection_string}')

In [9]:
# Transfer brewery dataframe to Postgres etlproj_db to create brewery table in etlproj_db
newbrew_df5.to_sql(name='ny_brews2', con=engine, if_exists='replace', index=False)

# read back from SQL Postgres for test purposes
pd.read_sql_query('select * from ny_brews2', con=engine).head()


Unnamed: 0,brewery,address,city,state,zip_code,brew_lat,brew_lon
0,Typhoon Brewery,22 E. 54th St. (@ Madison),New York,NY,10022,40.760342,-73.974326
1,The Riverosa Company,101 W. 75th St. Suite 5B,New York,NY,10023,40.779579,-73.977562
2,Spring Street Brewing Company,113 University Place - 11th Floor,New York,NY,10003,40.734271,-73.992162
3,Rheingold Brewing Company,130 West 42nd St.,New York,NY,10036,40.755123,-73.985463
4,Premier Beverages LLC,590 Madison Ave. 21st Fl.,New York,NY,10022,40.76211,-73.973069


In [10]:
##############################################################################################################

In [11]:
##### Air BnB Information #####

In [12]:
# Read in New York AirBnb CSV file from Kaggle.com
csv_file = "Resources/AB_NYC_2019.csv"
AB_NYC_2019_data_df = pd.read_csv(csv_file)
AB_NYC_2019_data_df.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,2018-10-19,0.21,6,365
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,2019-05-21,0.38,2,355
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,40.80902,-73.9419,Private room,150,3,0,,,1,365
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,2019-07-05,4.64,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,2018-11-19,0.1,1,0


In [13]:
# drop some unnecessary columns from AirBnB dataframe
AB_NYC_2019_data_df = AB_NYC_2019_data_df.drop(['last_review', 'reviews_per_month', 'number_of_reviews',\
'calculated_host_listings_count', 'neighbourhood', 'neighbourhood_group'], axis=1)
AB_NYC_2019_data_df.head()

Unnamed: 0,id,name,host_id,host_name,latitude,longitude,room_type,price,minimum_nights,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,40.64749,-73.97237,Private room,149,1,365
1,2595,Skylit Midtown Castle,2845,Jennifer,40.75362,-73.98377,Entire home/apt,225,1,355
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,40.80902,-73.9419,Private room,150,3,365
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,40.68514,-73.95976,Entire home/apt,89,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,40.79851,-73.94399,Entire home/apt,80,10,0


In [14]:
# Assign better column headers
AB_NYC_2019_data_df.columns = ['airbnb_id','airbnb_name','host_id','host_name','air_lat', 'air_lon',\
                               'room_type', 'price', 'minimum_nights', 'availability_365']
AB_NYC_2019_data_df

Unnamed: 0,airbnb_id,airbnb_name,host_id,host_name,air_lat,air_lon,room_type,price,minimum_nights,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,40.64749,-73.97237,Private room,149,1,365
1,2595,Skylit Midtown Castle,2845,Jennifer,40.75362,-73.98377,Entire home/apt,225,1,355
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,40.80902,-73.94190,Private room,150,3,365
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,40.68514,-73.95976,Entire home/apt,89,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,40.79851,-73.94399,Entire home/apt,80,10,0
...,...,...,...,...,...,...,...,...,...,...
48890,36484665,Charming one bedroom - newly renovated rowhouse,8232441,Sabrina,40.67853,-73.94995,Private room,70,2,9
48891,36485057,Affordable room in Bushwick/East Williamsburg,6570630,Marisol,40.70184,-73.93317,Private room,40,4,36
48892,36485431,Sunny Studio at Historical Neighborhood,23492952,Ilgar & Aysel,40.81475,-73.94867,Entire home/apt,115,10,27
48893,36485609,43rd St. Time Square-cozy single bed,30985759,Taz,40.75751,-73.99112,Shared room,55,1,2


In [15]:
# Create CSV file for AirBnB data
AB_NYC_2019_data_df.to_csv('nyabnb.csv')

In [16]:
# Establish connection with Postgres etlproj_db and engine for query purposes
from postgreslogin import postgres_key

MyPostLogin = postgres_key
rds_connection_string = MyPostLogin
engine = create_engine(f'postgresql://{rds_connection_string}')

In [17]:
#Transfer AirBnB data to Postgres etlproj_db to populate the AirBnB table
AB_NYC_2019_data_df.to_sql(name='ny_abnb', con=engine, if_exists='replace', index=False)
pd.read_sql_query('select * from ny_abnb', con=engine).head()

Unnamed: 0,airbnb_id,airbnb_name,host_id,host_name,air_lat,air_lon,room_type,price,minimum_nights,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,40.64749,-73.97237,Private room,149,1,365
1,2595,Skylit Midtown Castle,2845,Jennifer,40.75362,-73.98377,Entire home/apt,225,1,355
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,40.80902,-73.9419,Private room,150,3,365
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,40.68514,-73.95976,Entire home/apt,89,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,40.79851,-73.94399,Entire home/apt,80,10,0


In [18]:

##############################################################################################################

In [19]:
# Establish connection with Postgres etlproj_db and engine for query purposes
MyPostLogin = postgres_key
rds_connection_string = MyPostLogin
engine = create_engine(f'postgresql://{rds_connection_string}')
session = Session(engine)

In [20]:
# check for defined classes in Postgres etlproj_db
Base = automap_base()
Base.prepare(engine, reflect=True)
Base.classes.keys()

[]

In [21]:
# check for table names in Postgres etlproj_db
inspector = inspect(engine)
inspector.get_table_names()


['ny_breweries', 'distance', 'ny_brews2', 'ny_abnb']

In [22]:
# get the column names for Brewery table in Postgres etlproj_db
columns = inspector.get_columns('ny_brews2')
for c in columns:
    print(c['name'], c["type"])


brewery TEXT
address TEXT
city TEXT
state TEXT
zip_code BIGINT
brew_lat DOUBLE PRECISION
brew_lon DOUBLE PRECISION


In [23]:
# # get the column names for AirBnB table in Postgres etlproj_db
columns = inspector.get_columns('ny_abnb')
for c in columns:
    print(c['name'], c["type"])

airbnb_id BIGINT
airbnb_name TEXT
host_id BIGINT
host_name TEXT
air_lat DOUBLE PRECISION
air_lon DOUBLE PRECISION
room_type TEXT
price BIGINT
minimum_nights BIGINT
availability_365 BIGINT


In [24]:
##############################################################################################################

In [25]:
##########  perform a "cross join" of the brewery and airbnb tables to create new DataFrame ############

In [26]:
crossjoin_df = pd.read_sql_query('SELECT * FROM ny_brews2 CROSS JOIN ny_abnb', con=engine)

In [27]:
crossjoin_df

Unnamed: 0,brewery,address,city,state,zip_code,brew_lat,brew_lon,airbnb_id,airbnb_name,host_id,host_name,air_lat,air_lon,room_type,price,minimum_nights,availability_365
0,Typhoon Brewery,22 E. 54th St. (@ Madison),New York,NY,10022,40.760342,-73.974326,2539,Clean & quiet apt home by the park,2787,John,40.64749,-73.97237,Private room,149,1,365
1,The Riverosa Company,101 W. 75th St. Suite 5B,New York,NY,10023,40.779579,-73.977562,2539,Clean & quiet apt home by the park,2787,John,40.64749,-73.97237,Private room,149,1,365
2,Spring Street Brewing Company,113 University Place - 11th Floor,New York,NY,10003,40.734271,-73.992162,2539,Clean & quiet apt home by the park,2787,John,40.64749,-73.97237,Private room,149,1,365
3,Rheingold Brewing Company,130 West 42nd St.,New York,NY,10036,40.755123,-73.985463,2539,Clean & quiet apt home by the park,2787,John,40.64749,-73.97237,Private room,149,1,365
4,Premier Beverages LLC,590 Madison Ave. 21st Fl.,New York,NY,10022,40.762110,-73.973069,2539,Clean & quiet apt home by the park,2787,John,40.64749,-73.97237,Private room,149,1,365
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
831210,Hansens Times Square Brewery,160 W. 42nd St.,New York,NY,10036,40.755584,-73.986483,36487245,Trendy duplex in the very heart of Hell's Kitchen,68119814,Christophe,40.76404,-73.98933,Private room,90,7,23
831211,Commonwealth Brewing Co.,10 Rockefeller Plaza (@48th St.),New York,NY,10020,40.758046,-73.979444,36487245,Trendy duplex in the very heart of Hell's Kitchen,68119814,Christophe,40.76404,-73.98933,Private room,90,7,23
831212,Chelsea Brewing Co.,59 Chelsea Piers,New York,NY,10011,40.745682,-74.008722,36487245,Trendy duplex in the very heart of Hell's Kitchen,68119814,Christophe,40.76404,-73.98933,Private room,90,7,23
831213,Carnegie Hill Brewing Co.,1600 3rd Ave. (@ 90th St.),New York,NY,10028,40.781370,-73.952445,36487245,Trendy duplex in the very heart of Hell's Kitchen,68119814,Christophe,40.76404,-73.98933,Private room,90,7,23


In [None]:
##############################################################################################################

In [32]:
# Create an empty list and perform iterrows over the crossjoin_df and append to new_list the output of the f-string
new_list = []
for index, row in crossjoin_df.iterrows():
    biglist = f"({row['brewery']}, {row['airbnb_id']}, {row['brew_lat']}, {row['brew_lon']}, {row['air_lat']}, {row['air_lon']})"
    new_list.append(biglist)
print(type(new_list))
new_list

<class 'list'>


['(Typhoon Brewery, 2539, 40.760342, -73.974326, 40.647490000000005, -73.97237)',
 '(The Riverosa Company, 2539, 40.7795789, -73.9775621, 40.647490000000005, -73.97237)',
 '(Spring Street Brewing Company, 2539, 40.7342711, -73.9921624, 40.647490000000005, -73.97237)',
 '(Rheingold Brewing Company, 2539, 40.7551234, -73.98546329999999, 40.647490000000005, -73.97237)',
 '(Premier Beverages LLC, 2539, 40.76211, -73.973069, 40.647490000000005, -73.97237)',
 '(New Amsterdam Brewing Co., 2539, 40.7386232, -73.9871351, 40.647490000000005, -73.97237)',
 '(Neptune Brewery, 2539, 40.74289340000001, -74.0064163, 40.647490000000005, -73.97237)',
 "(Nacho Mama's Brewery, 2539, 40.7233037, -74.00370749999999, 40.647490000000005, -73.97237)",
 '(Manhattan Brewing Co. Restaurant, 2539, 40.7232777, -74.0037451, 40.647490000000005, -73.97237)',
 '(John Street Bar & Grill, 2539, 40.7099702, -74.0087412, 40.647490000000005, -73.97237)',
 '(Heartland Brewery, 2539, 40.73688990000001, -73.990792, 40.6474900

In [33]:
# Create an empty list and perform iterrows over the crossjoin_df and append to Row_list1 variable my_list1
Row_list1 =[] 

for index, row in crossjoin_df.iterrows():
    my_list1 = ({row['brewery']}, {row['airbnb_id']}, {row['brew_lat']}, {row['brew_lon']}, {row['air_lat']}, {row['air_lon']})
          
    # append the list to the final list 
    Row_list1.append(my_list1)
    
# Print the list 
# print(Row_list1)
print(type(Row_list1))
Row_list1

<class 'list'>


[({'Typhoon Brewery'},
  {2539},
  {40.760342},
  {-73.974326},
  {40.647490000000005},
  {-73.97237}),
 ({'The Riverosa Company'},
  {2539},
  {40.7795789},
  {-73.9775621},
  {40.647490000000005},
  {-73.97237}),
 ({'Spring Street Brewing Company'},
  {2539},
  {40.7342711},
  {-73.9921624},
  {40.647490000000005},
  {-73.97237}),
 ({'Rheingold Brewing Company'},
  {2539},
  {40.7551234},
  {-73.98546329999999},
  {40.647490000000005},
  {-73.97237}),
 ({'Premier Beverages LLC'},
  {2539},
  {40.76211},
  {-73.973069},
  {40.647490000000005},
  {-73.97237}),
 ({'New Amsterdam Brewing Co.'},
  {2539},
  {40.7386232},
  {-73.9871351},
  {40.647490000000005},
  {-73.97237}),
 ({'Neptune Brewery'},
  {2539},
  {40.74289340000001},
  {-74.0064163},
  {40.647490000000005},
  {-73.97237}),
 ({"Nacho Mama's Brewery"},
  {2539},
  {40.7233037},
  {-74.00370749999999},
  {40.647490000000005},
  {-73.97237}),
 ({'Manhattan Brewing Co. Restaurant'},
  {2539},
  {40.7232777},
  {-74.0037451},
  {

In [34]:
# Create an empty list and perform iterrows over the crossjoin_df and append to Row_list2 variable my_list2
Row_list2 =[] 
  
# Iterate over each row 
for index, rows in crossjoin_df.iterrows(): 
    # Create list for the current row 
    my_list2 =[rows.brewery, rows.airbnb_id, rows.brew_lat, rows.brew_lon, rows.air_lat, rows.air_lon] 
      
    # append the list to the final list 
    Row_list2.append(my_list2) 

# Print Row_list
print(type(Row_list2))
Row_list2

<class 'list'>


[['Typhoon Brewery',
  2539,
  40.760342,
  -73.974326,
  40.647490000000005,
  -73.97237],
 ['The Riverosa Company',
  2539,
  40.7795789,
  -73.9775621,
  40.647490000000005,
  -73.97237],
 ['Spring Street Brewing Company',
  2539,
  40.7342711,
  -73.9921624,
  40.647490000000005,
  -73.97237],
 ['Rheingold Brewing Company',
  2539,
  40.7551234,
  -73.98546329999999,
  40.647490000000005,
  -73.97237],
 ['Premier Beverages LLC',
  2539,
  40.76211,
  -73.973069,
  40.647490000000005,
  -73.97237],
 ['New Amsterdam Brewing Co.',
  2539,
  40.7386232,
  -73.9871351,
  40.647490000000005,
  -73.97237],
 ['Neptune Brewery',
  2539,
  40.74289340000001,
  -74.0064163,
  40.647490000000005,
  -73.97237],
 ["Nacho Mama's Brewery",
  2539,
  40.7233037,
  -74.00370749999999,
  40.647490000000005,
  -73.97237],
 ['Manhattan Brewing Co. Restaurant',
  2539,
  40.7232777,
  -74.0037451,
  40.647490000000005,
  -73.97237],
 ['John Street Bar & Grill',
  2539,
  40.7099702,
  -74.0087412,
  40.

In [35]:
crossjoin_df2 = pd.DataFrame(Row_list2)

In [36]:
crossjoin_df2

Unnamed: 0,0,1,2,3,4,5
0,Typhoon Brewery,2539,40.760342,-73.974326,40.64749,-73.97237
1,The Riverosa Company,2539,40.779579,-73.977562,40.64749,-73.97237
2,Spring Street Brewing Company,2539,40.734271,-73.992162,40.64749,-73.97237
3,Rheingold Brewing Company,2539,40.755123,-73.985463,40.64749,-73.97237
4,Premier Beverages LLC,2539,40.762110,-73.973069,40.64749,-73.97237
...,...,...,...,...,...,...
831210,Hansens Times Square Brewery,36487245,40.755584,-73.986483,40.76404,-73.98933
831211,Commonwealth Brewing Co.,36487245,40.758046,-73.979444,40.76404,-73.98933
831212,Chelsea Brewing Co.,36487245,40.745682,-74.008722,40.76404,-73.98933
831213,Carnegie Hill Brewing Co.,36487245,40.781370,-73.952445,40.76404,-73.98933


In [37]:
crossjoin_df2.columns = ['brewery','airbnb_id','brew_lat','brew_lon','air_lat','air_lon']

In [38]:
crossjoin_df2.head()

Unnamed: 0,brewery,airbnb_id,brew_lat,brew_lon,air_lat,air_lon
0,Typhoon Brewery,2539,40.760342,-73.974326,40.64749,-73.97237
1,The Riverosa Company,2539,40.779579,-73.977562,40.64749,-73.97237
2,Spring Street Brewing Company,2539,40.734271,-73.992162,40.64749,-73.97237
3,Rheingold Brewing Company,2539,40.755123,-73.985463,40.64749,-73.97237
4,Premier Beverages LLC,2539,40.76211,-73.973069,40.64749,-73.97237


In [39]:
#This is code to limit rows to 17 for testing purposes only
# crossjoin_df3 = crossjoin_df2
# crossjoin_df4 = crossjoin_df3.iloc[0:17]

In [40]:
crossjoin_df4 = crossjoin_df2

In [41]:
crossjoin_df4

Unnamed: 0,brewery,airbnb_id,brew_lat,brew_lon,air_lat,air_lon
0,Typhoon Brewery,2539,40.760342,-73.974326,40.64749,-73.97237
1,The Riverosa Company,2539,40.779579,-73.977562,40.64749,-73.97237
2,Spring Street Brewing Company,2539,40.734271,-73.992162,40.64749,-73.97237
3,Rheingold Brewing Company,2539,40.755123,-73.985463,40.64749,-73.97237
4,Premier Beverages LLC,2539,40.762110,-73.973069,40.64749,-73.97237
...,...,...,...,...,...,...
831210,Hansens Times Square Brewery,36487245,40.755584,-73.986483,40.76404,-73.98933
831211,Commonwealth Brewing Co.,36487245,40.758046,-73.979444,40.76404,-73.98933
831212,Chelsea Brewing Co.,36487245,40.745682,-74.008722,40.76404,-73.98933
831213,Carnegie Hill Brewing Co.,36487245,40.781370,-73.952445,40.76404,-73.98933


In [None]:
################ code to use to get distance between the 16 breweries and the AirBnB's ###############

In [None]:
########  This is code for verifying a single calculaton of the large file below #############
from math import radians, cos, sin, asin, sqrt 
from haversine import haversine, Unit

def distance(lat1, lat2, lon1, lon2): 
      
    # The math module contains a function named 
    # radians which converts from degrees to radians. 
    lon1 = radians(lon1) 
    lon2 = radians(lon2) 
    lat1 = radians(lat1) 
    lat2 = radians(lat2) 
       
    # Haversine formula  
    dlon = lon2 - lon1  
    dlat = lat2 - lat1 
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
  
    c = 2 * asin(sqrt(a))  
     
    # Radius of earth in kilometers is 6371. Use 3956 for miles. Use 20908800 for feet. Use 6969600 for yards.
    r = 3956
       
    # calculate the result 
    return(c * r) 
      
      
# driver code  
lat1 = 40.760342
lat2 = 40.647490000000005
lon1 = -73.974326
lon2 =  -73.97237
print(distance(lat1, lat2, lon1, lon2), "Miles")



In [42]:
# Use the Haversine Function to calculate the distance between the coordinates of the Brewery and each AirBnB

from math import radians, cos, sin, asin, sqrt
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    Miles = 3956 * c
    return Miles


for index, row in crossjoin_df4.iterrows():
        crossjoin_df4.loc[index, 'distance'] = haversine(row[3], row[2], row[5], row[4])
        
crossjoin_df4.head(17)



Unnamed: 0,brewery,airbnb_id,brew_lat,brew_lon,air_lat,air_lon,distance
0,Typhoon Brewery,2539,40.760342,-73.974326,40.64749,-73.97237,7.792564
1,The Riverosa Company,2539,40.779579,-73.977562,40.64749,-73.97237,9.124155
2,Spring Street Brewing Company,2539,40.734271,-73.992162,40.64749,-73.97237,6.080756
3,Rheingold Brewing Company,2539,40.755123,-73.985463,40.64749,-73.97237,7.463109
4,Premier Beverages LLC,2539,40.76211,-73.973069,40.64749,-73.97237,7.914048
5,New Amsterdam Brewing Co.,2539,40.738623,-73.987135,40.64749,-73.97237,6.339611
6,Neptune Brewery,2539,40.742893,-74.006416,40.64749,-73.97237,6.824011
7,Nacho Mama's Brewery,2539,40.723304,-74.003707,40.64749,-73.97237,5.485689
8,Manhattan Brewing Co. Restaurant,2539,40.723278,-74.003745,40.64749,-73.97237,5.484566
9,John Street Bar & Grill,2539,40.70997,-74.008741,40.64749,-73.97237,4.715642


In [43]:
# Confirm  Column Headers

crossjoin_df4.columns

Index(['brewery', 'airbnb_id', 'brew_lat', 'brew_lon', 'air_lat', 'air_lon',
       'distance'],
      dtype='object')

In [44]:
# Assign New Column Headers if necessary

crossjoin_df4.columns = ['brewery','airbnb_id','brew_lat','brew_lon','air_lat','air_lon', 'dist']

In [45]:
crossjoin_df4

Unnamed: 0,brewery,airbnb_id,brew_lat,brew_lon,air_lat,air_lon,dist
0,Typhoon Brewery,2539,40.760342,-73.974326,40.64749,-73.97237,7.792564
1,The Riverosa Company,2539,40.779579,-73.977562,40.64749,-73.97237,9.124155
2,Spring Street Brewing Company,2539,40.734271,-73.992162,40.64749,-73.97237,6.080756
3,Rheingold Brewing Company,2539,40.755123,-73.985463,40.64749,-73.97237,7.463109
4,Premier Beverages LLC,2539,40.762110,-73.973069,40.64749,-73.97237,7.914048
...,...,...,...,...,...,...,...
831210,Hansens Times Square Brewery,36487245,40.755584,-73.986483,40.76404,-73.98933,0.602536
831211,Commonwealth Brewing Co.,36487245,40.758046,-73.979444,40.76404,-73.98933,0.662229
831212,Chelsea Brewing Co.,36487245,40.745682,-74.008722,40.76404,-73.98933,1.623381
831213,Carnegie Hill Brewing Co.,36487245,40.781370,-73.952445,40.76404,-73.98933,2.269684


In [46]:
# Establish connection with Postgres etlproj_db and engine for query purposes
from postgreslogin import postgres_key

MyPostLogin = postgres_key
rds_connection_string = MyPostLogin
engine = create_engine(f'postgresql://{rds_connection_string}')

In [47]:
# Create new table in Postgres etlproject_db and transfer data which will contain the 
# distances needed for the next calculation to be completed in Postgres using a SQL query

crossjoin_df4.to_sql(name='distance', con=engine, if_exists='replace', index=False)
pd.read_sql_query('select * from distance', con=engine).head()

Unnamed: 0,brewery,airbnb_id,brew_lat,brew_lon,air_lat,air_lon,dist
0,Typhoon Brewery,2539,40.760342,-73.974326,40.64749,-73.97237,7.792564
1,The Riverosa Company,2539,40.779579,-73.977562,40.64749,-73.97237,9.124155
2,Spring Street Brewing Company,2539,40.734271,-73.992162,40.64749,-73.97237,6.080756
3,Rheingold Brewing Company,2539,40.755123,-73.985463,40.64749,-73.97237,7.463109
4,Premier Beverages LLC,2539,40.76211,-73.973069,40.64749,-73.97237,7.914048


In [50]:
session.close()