In [9]:
import os
import json
import time
import math
import copy
import pandas as pd
import numpy as np


class Haversine:
    '''
    from: https://nathanrooy.github.io/posts/2016-09-07/haversine-with-python/

    use the haversine class to calculate the distance between
    two lon/lat coordnate pairs.
    output distance available in kilometers, meters, miles, and feet.
    example usage: Haversine([lon1,lat1],[lon2,lat2]).feet
    
    '''
    def __init__(self,coord1,coord2):
        lon1,lat1=coord1
        lon2,lat2=coord2
        
        R=6371000                               # radius of Earth in meters
        phi_1=math.radians(lat1)
        phi_2=math.radians(lat2)

        delta_phi=math.radians(lat2-lat1)
        delta_lambda=math.radians(lon2-lon1)

        a=math.sin(delta_phi/2.0)**2+\
           math.cos(phi_1)*math.cos(phi_2)*\
           math.sin(delta_lambda/2.0)**2
        c=2*math.atan2(math.sqrt(a),math.sqrt(1-a))
        
        self.meters=R*c                         # output distance in meters
        self.km=self.meters/1000.0              # output distance in kilometers
        self.miles=self.meters*0.000621371      # output distance in miles
        self.feet=self.miles*5280               # output distance in feet
    
    
    def withinTolerance(self, tolerance):
        if self.meters <= tolerance:
            return True
        else:
            return False
        
np_loc = "national_parks.csv"
trails_loc = "nationalparktrails.csv"
hotels_loc = "7282_1.csv"



In [3]:
np_df = pd.read_csv(np_loc)
trails_df = pd.read_csv(trails_loc)
hotels_df = pd.read_csv(hotels_loc)

In [29]:
import time

# Area of zion national park: 229.1 mi²
# This translates to the average length of 24,359 in meters

TOLERANCE = 100000


all_np_coords = []
hotels_within_coords = []
hotels_outside_coords = []

nearby = []
hotels_record = {}
for i, hotel_row in hotels_df.iterrows():
    if i % 1000 == 0:
        print(i)
#     row_nearby = []
    hotel_coords = [hotel_row['longitude'], hotel_row['latitude']]
    hotel_key = hash(hotel_row['longitude']) + 11 * hotel_row['latitude']
    if hotel_key in hotels_record:     # To reference old computation bc multiple reviews exist for same hotel
        nearby.append(hotels_record[hotel_key])
        continue
        
#     print(hotel_coords)
    for j, np_row in np_df.iterrows():
        np_coords = [np_row['coordinates/longitude'], np_row['coordinates/latitude']] 
        within_one = False  # Boolean to check if more than one hotel is within tolerance 
#         print(np_coords)
        if Haversine(hotel_coords, np_coords).withinTolerance(TOLERANCE):
            print(hotel_row['name'], "is close to", np_row['title'])
            if within_one:
                raise Exception("MULTIPLE NATIONAL PARKS FOUND WITHIN TOLERANCE FOR", hotel_row['name'])
            within_one = True
            nearby.append(np_row['id'])
            hotels_record[hotel_key] = np_row['id']
    if not within_one:
        nearby.append(np.nan)
        hotels_record[hotel_key] = np.nan


print(len(nearby))
print(len(hotels_df))
test = pd.Series(nearby).notna()
num_within = len(test[test == True])
num_outside = len(test[test == False])
print(num_within, num_outside)
print(f"{num_within / (num_within + num_outside)}% within tolerance")

hotels_df['nearby'] = nearby
        

  
            
        
            
            
        
        
    
    
    

0
Intermezzo is close to Channel Islands
Little Paradise Hotel is close to Joshua Tree
Super 8 Columbia Dwntwn Sc is close to Congaree
Ocean Breeze Motel Apts is close to Biscayne
1000
The National Conference Center is close to Shenandoah
Holiday Inn Express & Suites Lenoir City (knoxville Area) is close to Great Smoky Mountains
The Golden Hotel, An Ascend Hotel Collection Member is close to Rocky Mountain
Hyatt Regency Reston is close to Shenandoah
Mt Gardner Inn is close to North Cascades
La Quinta Inn & Suites Auburn is close to Mount Rainier
Big Mountain Lodge is close to Glacier
2000
Sportsmen Motel is close to Olympic
Enchanted Castle Hotel is close to Theodore Roosevelt
Econo Lodge is close to North Cascades
Spirit Tree Inn B & B is close to Saguaro
Durango Travelodge is close to Mesa Verde
La Quinta Inn and Suites Tucson - Reid Park is close to Saguaro
3000
Inn At Queen Anne is close to Mount Rainier
Inn At Queen Anne is close to Olympic
Budget Inn South is close to Biscayne
Da

ValueError: Length of values (36069) does not match length of index (35912)

35912
35912


0        False
1        False
2        False
3        False
4        False
         ...  
35907    False
35908    False
35909    False
35910    False
35911    False
Length: 35912, dtype: bool