In [9]:
# Import dependencies
import pandas as pd
import numpy as np
import requests
pd.set_option('max_colwidth', 400)

In [10]:
# API key and URL
api_key = 'Your_API_Key'
api_url = 'https://api.yelp.com/v3/businesses/search'

# Params
params = {
    'term': 'gym',
    'location': 'Austin',
    'categories': 'gyms',
    'limit': 50,
    'offset': 0
}

# Set up headers
headers = {
    'Authorization': api_key
}

# Empty list for results
all_gyms = []
results_per_request = 50

while True:
    # API request
    response = requests.get(api_url, params=params, headers=headers)

    # If request successful
    if response.status_code == 200:
        data = response.json()
        businesses = data.get('businesses', [])
        all_gyms.extend(businesses)

        # Check if more results
        if len(businesses) < results_per_request:
            break

        # Update offset for the next request
        params['offset'] += results_per_request

    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        break

# Create list of dictionaries
gym_data = []
for business in all_gyms:
    coordinates = business.get('coordinates', {})
    gym_data.append({
        'Name': business.get('name', ''),
        'Address': ', '.join(business.get('location', {}).get('display_address', [])),
        'Ratings': business.get('rating', 0),
        'Latitude': coordinates.get('latitude', 0),
        'Longitude': coordinates.get('longitude', 0)
    })

# Create a Pandas DataFrame
gym_df = pd.DataFrame(gym_data)
gym_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 351 entries, 0 to 350
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Name       351 non-null    object 
 1   Address    351 non-null    object 
 2   Ratings    351 non-null    float64
 3   Latitude   351 non-null    float64
 4   Longitude  351 non-null    float64
dtypes: float64(3), object(2)
memory usage: 13.8+ KB


In [11]:
# Create DF
gyms_df = gym_df[['Name', 'Address', 'Ratings', 'Latitude', 'Longitude']].copy()

# Rename columns
gyms_df.columns = ['gym_name', 'gym_address', 'gym_ratings', 'latitude', 'longitude']

# Add new column for gym_id
gyms_df['gym_id'] = range(1, len(gyms_df) + 1)

# Reorder columns
gyms_df = gyms_df[['gym_id', 'gym_name', 'gym_address', 'gym_ratings', 'latitude', 'longitude']]
print(gyms_df.head())

   gym_id              gym_name  \
0       1   Castle Hill Fitness   
1       2              Lift ATX   
2       3        Atomic Athlete   
3       4  FeV - Iron Vault Gym   
4       5           Big Tex Gym   

                                         gym_address  gym_ratings   latitude  \
0         1112 N Lamar Blvd, Ste B, Austin, TX 78703          4.5  30.276770   
1        1005 Springdale Rd, Ste 3, Austin, TX 78721          4.0  30.265355   
2               3907 Warehouse Row, Austin, TX 78704          5.0  30.222190   
3  2101 W Ben White Blvd, Unit 105, Austin, TX 78704          5.0  30.229705   
4     1921 Cedar Bend Dr, Ste A130, Austin, TX 78758          4.5  30.407541   

   longitude  
0 -97.752040  
1 -97.694444  
2 -97.756855  
3 -97.791346  
4 -97.697903  


In [12]:
# Export DF as CSV file
gyms_df.to_csv("Resources/gyms.csv", index=False)

In [13]:
# Load property data from CSV
property_df = pd.read_csv("Resources/property.csv")

In [14]:
from geopy.distance import great_circle

# Junction Table for property_gym
property_gym_df = pd.DataFrame(columns=['property_id', 'gym_id'])

# Find nearby locations and create relationships
for property_row in property_df.itertuples():
    property_location = (property_row.latitude, property_row.longitude)
    
    for gym_row in gyms_df.itertuples():
        gym_location = (gym_row.latitude, gym_row.longitude)
        
        # Calculate distance between property and gym locations
        distance_km = great_circle(property_location, gym_location).kilometers
        
        # Define a distance threshold (adjust as needed)
        distance_threshold = 1.0  
        
        # If the distance is within the threshold, create a relationship
        if distance_km <= distance_threshold:
            new_row = pd.DataFrame({
                'property_id': [property_row.property_id],
                'gym_id': [gym_row.gym_id]
            })
            property_gym_df = pd.concat([property_gym_df, new_row], ignore_index=True)

# Display resulting DF
print(property_gym_df)


     property_id gym_id
0              7    115
1              7    197
2              7    334
3              9    235
4              9    302
...          ...    ...
6252        8876    175
6253        8877    147
6254        8877    175
6255        8878    147
6256        8878    175

[6257 rows x 2 columns]


In [15]:
# Export junction table as CSV file
property_gym_df.to_csv("Resources/property_gym.csv", index=False)