In [1]:
import os
import json
import time
import math
import copy
import pandas as pd
import numpy as np


class Haversine:
    '''
    from: https://nathanrooy.github.io/posts/2016-09-07/haversine-with-python/

    use the haversine class to calculate the distance between
    two lon/lat coordnate pairs.
    output distance available in kilometers, meters, miles, and feet.
    example usage: Haversine([lon1,lat1],[lon2,lat2]).feet
    
    '''
    def __init__(self,coord1,coord2):
        lon1,lat1=coord1
        lon2,lat2=coord2
        
        R=6371000                               # radius of Earth in meters
        phi_1=math.radians(lat1)
        phi_2=math.radians(lat2)

        delta_phi=math.radians(lat2-lat1)
        delta_lambda=math.radians(lon2-lon1)

        a=math.sin(delta_phi/2.0)**2+\
           math.cos(phi_1)*math.cos(phi_2)*\
           math.sin(delta_lambda/2.0)**2
        c=2*math.atan2(math.sqrt(a),math.sqrt(1-a))
        
        self.meters=R*c                         # output distance in meters
        self.km=self.meters/1000.0              # output distance in kilometers
        self.miles=self.meters*0.000621371      # output distance in miles
        self.feet=self.miles*5280               # output distance in feet
    
    
    def withinTolerance(self, tolerance):
        if self.meters <= tolerance:
            return True
        else:
            return False
        
np_loc = "national_parks.csv"
trails_loc = "nationalparktrails.csv"
hotels_loc = "7282_1.csv"



In [2]:
np_df = pd.read_csv(np_loc)
trails_df = pd.read_csv(trails_loc)
hotels_df = pd.read_csv(hotels_loc)

In [3]:
# ref:
# Area of zion national park: 229.1 mi²
# This translates to the average length of 24,359 in meters

TOLERANCE = 80467.2  # 50 miles


def assignParksToHotels(hotels_df, np_df):
    '''
    hotels_df = dataframe of initial hotels data
    np_df = dataframe of initial national park
    
    Assigned park ids to a hotel 
    '''
    nearby = []
    additional = []
    hotels_record = {}
    for i, hotel_row in hotels_df.iterrows():
        hotel_coords = [hotel_row['longitude'], hotel_row['latitude']]
        hotel_key = hash(hotel_row['longitude']) + 11 * hotel_row['latitude']
        if hotel_key in hotels_record:     # To reference old computation bc multiple reviews exist for same hotel
            nearby.append(hotels_record[hotel_key])
            continue
        nearby_np = []
        for j, np_row in np_df.iterrows():
            np_coords = [np_row['coordinates/longitude'], np_row['coordinates/latitude']]
            np_radius = math.sqrt(float(np_row['area/square_km'].replace(",",""))) * 1000  # Radius of park in meters
            H = Haversine(hotel_coords, np_coords)
            if H.withinTolerance(TOLERANCE + np_radius):
                temp_nearby = {
                    'id': np_row['id'],
                    'dist': H.meters - np_radius
                }
                nearby_np.append(temp_nearby)
        if len(nearby_np) == 0:
            nearby.append(np.nan)
            hotels_record[hotel_key] = np.nan
        elif len(nearby_np) == 1:
            nearby.append(nearby_np[0]['id'])
            hotels_record[hotel_key] = nearby_np[0]['id']
        else: # more than 1 nearby national parks
            nearby_np.sort(key=lambda x: x['dist'])
            nearby.append(nearby_np[0]['id'])
            hotels_record[hotel_key] = nearby_np[0]['id']
    
    hotels_df['nearby'] = nearby    
    assignments = list(hotels_record.values())    
    total_num = len(assignments)
    total_unassigned = assignments.count(np.nan)
    print(f"{(total_num - total_unassigned) / total_num}% of hotels ({total_num - total_unassigned}) are near national parks")
    
        

        
assignParksToHotels(hotels_df, np_df)

hotels_df
hotels_df['nearby'].notna()


            
        
            
            
        
        
    
    
    

0.1627689429373246% of hotels (174) are near national parks


0        False
1        False
2        False
3        False
4        False
         ...  
35907     True
35908     True
35909     True
35910    False
35911    False
Name: nearby, Length: 35912, dtype: bool

0        False
1        False
2        False
3        False
4        False
         ...  
35907     True
35908     True
35909     True
35910    False
35911    False
Name: nearby, Length: 35912, dtype: bool