In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import requests
pd.set_option('max_colwidth', 400)

In [2]:
# API key and URL
api_key = 'Your_API_Key'
api_url = 'https://api.yelp.com/v3/businesses/search'

# Params
params = {
    'term': 'restaurant',
    'location': 'Austin',
    'categories': 'restaurants',
    'limit': 50,
    'offset': 0
}

# Set up headers
headers = {
    'Authorization': api_key
}

# Empty list for results
all_restaurants = []
results_per_request = 50

while True:
    # API request
    response = requests.get(api_url, params=params, headers=headers)

    # If request successful
    if response.status_code == 200:
        data = response.json()
        businesses = data.get('businesses', [])
        all_restaurants.extend(businesses)

        # Check if more results
        if len(businesses) < results_per_request:
            break

        # Update offset for the next request
        params['offset'] += results_per_request

    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        break

# Create list of dictionaries
restaurant_data = []
for business in all_restaurants:
    coordinates = business.get('coordinates', {})
    restaurant_data.append({
        'Name': business.get('name', ''),
        'Address': ', '.join(business.get('location', {}).get('display_address', [])),
        'Ratings': business.get('rating', 0),
        'Latitude': coordinates.get('latitude', 0),
        'Longitude': coordinates.get('longitude', 0)
    })

# Create a Pandas DataFrame
restaurant_df = pd.DataFrame(restaurant_data)
restaurant_df.info()


Error: 400
{"error": {"code": "VALIDATION_ERROR", "description": "Too many results requested, limit+offset must be <= 1000."}}
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Name       1000 non-null   object 
 1   Address    1000 non-null   object 
 2   Ratings    1000 non-null   float64
 3   Latitude   1000 non-null   float64
 4   Longitude  1000 non-null   float64
dtypes: float64(3), object(2)
memory usage: 39.2+ KB


In [3]:
# Create DF
restaurants_df = restaurant_df[['Name', 'Address', 'Ratings', 'Latitude', 'Longitude']].copy()

# Rename columns
restaurants_df.columns = ['rest_name', 'rest_address', 'rest_ratings', 'latitude', 'longitude']

# Add new column for rest_id
restaurants_df['rest_id'] = range(1, len(restaurants_df) + 1)

# Reorder columns
restaurants_df = restaurants_df[['rest_id', 'rest_name', 'rest_address', 'rest_ratings', 'latitude', 'longitude']]
print(restaurants_df.head())

   rest_id                    rest_name  \
0        1                    Qi Austin   
1        2            1618 Asian Fusion   
2        3                 Aba - Austin   
3        4                       Anthem   
4        5  Moonshine Patio Bar & Grill   

                                                   rest_address  rest_ratings  \
0                   835 West 6th St, Unit 114, Austin, TX 78703           4.5   
1  1618 E Riverside Dr, AMLI South Shore Bldg, Austin, TX 78741           4.5   
2        1011 S Congress Ave, Bldg 2, Ste 180, Austin, TX 78704           4.5   
3                       91 Rainey St, Ste 120, Austin, TX 78701           4.5   
4                            303 Red River St, Austin, TX 78701           4.5   

    latitude  longitude  
0  30.270586 -97.752515  
1  30.245474 -97.730411  
2  30.253570 -97.748030  
3  30.260500 -97.737947  
4  30.263754 -97.738077  


In [4]:
# Export DF as CSV file
restaurants_df.to_csv("Resources/restaurants.csv", index=False)

In [5]:
# Load property data from CSV
property_df = pd.read_csv("Resources/property.csv")

In [7]:
from geopy.distance import great_circle

# Junction Table for property_restaurant
property_restaurant_df = pd.DataFrame(columns=['property_id', 'rest_id'])

# Find nearby locations and create relationships
for property_row in property_df.itertuples():
    property_location = (property_row.latitude, property_row.longitude)
    
    for restaurant_row in restaurants_df.itertuples():
        restaurant_location = (restaurant_row.latitude, restaurant_row.longitude)
        
        # Calculate distance between property and restaurant locations
        distance_km = great_circle(property_location, restaurant_location).kilometers
        
        distance_threshold = 1.0  
        
        # If the distance is within the threshold, create a relationship
        if distance_km <= distance_threshold:
            new_row = pd.DataFrame({
                'property_id': [property_row.property_id],
                'rest_id': [restaurant_row.rest_id]
            })
            property_restaurant_df = pd.concat([property_restaurant_df, new_row], ignore_index=True)

# Display the resulting DataFrame
print(property_restaurant_df)

      property_id rest_id
0             964       1
1             964       9
2             964      16
3             964      18
4             964      19
...           ...     ...
40163        8876     931
40164        8877     931
40165        8878     931
40166        8879     931
40167        8880     931

[40168 rows x 2 columns]


In [None]:
# Export junction table as CSV file
property_restaurant_df.to_csv("Resources/property_restaurant.csv", index=False)