In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import requests
pd.set_option('max_colwidth', 400)

In [2]:
# API key and URL
api_key = 'Your_API_Key'
api_url = 'https://api.yelp.com/v3/businesses/search'

# Params
params = {
    'term': 'park',
    'location': 'Austin',
    'categories': 'parks',
    'limit': 50,
    'offset': 0
}

# Set up headers
headers = {
    'Authorization': api_key
}

# Empty list for results
all_parks = []
results_per_request = 50

while True:
    # API request
    response = requests.get(api_url, params=params, headers=headers)

    # If request successful
    if response.status_code == 200:
        data = response.json()
        businesses = data.get('businesses', [])
        all_parks.extend(businesses)

        # Check if more results
        if len(businesses) < results_per_request:
            break

        # Update offset for the next request
        params['offset'] += results_per_request

    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        break

# Create list of dictionaries
park_data = []
for business in all_parks:
    coordinates = business.get('coordinates', {})
    park_data.append({
        'Name': business.get('name', ''),
        'Address': ', '.join(business.get('location', {}).get('display_address', [])),
        'Ratings': business.get('rating', 0),
        'Latitude': coordinates.get('latitude', 0),
        'Longitude': coordinates.get('longitude', 0)
    })

# Create a Pandas DataFrame
park_df = pd.DataFrame(park_data)
park_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 470 entries, 0 to 469
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Name       470 non-null    object 
 1   Address    470 non-null    object 
 2   Ratings    470 non-null    float64
 3   Latitude   470 non-null    float64
 4   Longitude  470 non-null    float64
dtypes: float64(3), object(2)
memory usage: 18.5+ KB


In [3]:
# Create DF
parks_df = park_df[['Name', 'Address', 'Ratings', 'Latitude', 'Longitude']].copy()

# Rename columns
parks_df.columns = ['park_name', 'park_address', 'park_ratings', 'latitude', 'longitude']

# Add new column for park_id
parks_df['park_id'] = range(1, len(parks_df) + 1)

# Reorder columns
parks_df = parks_df[['park_id', 'park_name', 'park_address', 'park_ratings', 'latitude', 'longitude']]
print(parks_df.head())

   park_id                     park_name  \
0        1                    Pease Park   
1        2                   Zilker Park   
2        3  Town Lake Metropolitan Parks   
3        4             Mueller Lake Park   
4        5        Barton Creek Greenbelt   

                                    park_address  park_ratings   latitude  \
0           1100 Kingsburry St, Austin, TX 78799           4.5  30.281418   
1       2100 Barton Springs Rd, Austin, TX 78704           4.5  30.266852   
2                               Austin, TX 78701           5.0  30.270895   
3            4550 Mueller Blvd, Austin, TX 78723           4.5  30.305104   
4  3755-B Capital Of Texas Hwy, Austin, TX 78704           4.5  30.247184   

   longitude  
0 -97.751934  
1 -97.775849  
2 -97.764860  
3 -97.703492  
4 -97.812688  


In [4]:
# Export DF as CSV file
parks_df.to_csv("Resources/parks.csv", index=False)

In [5]:
# Load property data from CSV
property_df = pd.read_csv("Resources/property.csv")

In [6]:
from geopy.distance import great_circle

# Junction Table for property_park
property_park_df = pd.DataFrame(columns=['property_id', 'park_id'])

# Find nearby locations and create relationships
for property_row in property_df.itertuples():
    property_location = (property_row.latitude, property_row.longitude)
    
    for park_row in parks_df.itertuples():
        park_location = (park_row.latitude, park_row.longitude)
        
        # Calculate distance between property and park locations
        distance_km = great_circle(property_location, park_location).kilometers
        
        # Define a distance threshold (adjust as needed)
        distance_threshold = 1.0  
        
        # If the distance is within the threshold, create a relationship
        if distance_km <= distance_threshold:
            new_row = pd.DataFrame({
                'property_id': [property_row.property_id],
                'park_id': [park_row.park_id]
            })
            property_park_df = pd.concat([property_park_df, new_row], ignore_index=True)

# Display the resulting DataFrame
print(property_park_df)

     property_id park_id
0              1     192
1              7     192
2              7     364
3              7     397
4              7     413
...          ...     ...
9371        8867     120
9372        8868     228
9373        8870      69
9374        8871      69
9375        8871     120

[9376 rows x 2 columns]


In [7]:
# Export junction table as CSV file
property_park_df.to_csv("Resources/property_park.csv", index=False)