In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import requests

In [2]:
# API key and URL
api_key = 'Your_API_Key'
api_url = 'https://api.yelp.com/v3/businesses/search'

# Params
params = {
    'term': 'grocery',
    'location': 'Austin',
    'categories': 'grocery',
    'limit': 50,
    'offset': 0
}

# Set up headers
headers = {
    'Authorization': api_key
}

# Empty list for results
all_groceries = []
results_per_request = 50

while True:
    # API request
    response = requests.get(api_url, params=params, headers=headers)

    # If request successful
    if response.status_code == 200:
        data = response.json()
        businesses = data.get('businesses', [])
        all_groceries.extend(businesses)

        # Check if more results
        if len(businesses) < results_per_request:
            break

        # Update offset for the next request
        params['offset'] += results_per_request

    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        break

# Create list of dictionaries
grocery_data = []
for business in all_groceries:
    coordinates = business.get('coordinates', {})
    grocery_data.append({
        'Name': business.get('name', ''),
        'Address': ', '.join(business.get('location', {}).get('display_address', [])),
        'Ratings': business.get('rating', 0),
        'Latitude': coordinates.get('latitude', 0),
        'Longitude': coordinates.get('longitude', 0)
    })

# Create a Pandas DataFrame
grocery_df = pd.DataFrame(grocery_data)
grocery_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 346 entries, 0 to 345
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Name       346 non-null    object 
 1   Address    346 non-null    object 
 2   Ratings    346 non-null    float64
 3   Latitude   346 non-null    float64
 4   Longitude  346 non-null    float64
dtypes: float64(3), object(2)
memory usage: 13.6+ KB


In [3]:
# Create DF
groceries_df = grocery_df[['Name', 'Address', 'Ratings', 'Latitude', 'Longitude']].copy()

# Rename columns
groceries_df.columns = ['grocery_name', 'grocery_address', 'grocery_ratings', 'latitude', 'longitude']

# Add new column for grocery_id
groceries_df['grocery_id'] = range(1, len(groceries_df) + 1)

# Reorder columns
groceries_df = groceries_df[['grocery_id', 'grocery_name', 'grocery_address', 'grocery_ratings', 'latitude', 'longitude']]
print(groceries_df.head())

   grocery_id                  grocery_name  \
0           1                         H-E-B   
1           2                    TinyGrocer   
2           3  Central Market - North Lamar   
3           4                 ATX Food Mart   
4           5                  Dia's Market   

                         grocery_address  grocery_ratings   latitude  \
0       1000 E 41st St, Austin, TX 78751              3.5  30.300643   
1  1718 S Congress Ave, Austin, TX 78704              5.0  30.246388   
2    4001 N Lamar Blvd, Austin, TX 78756              4.0  30.307620   
3  3002 S Congress Ave, Austin, TX 78704              5.0  30.232479   
4        812 Justin Ln, Austin, TX 78757              4.5  30.337602   

   longitude  
0 -97.719963  
1 -97.751148  
2 -97.739877  
3 -97.758972  
4 -97.720560  


In [4]:
# Export DF as CSV file
groceries_df.to_csv("Resources/groceries.csv", index=False)


In [5]:
# Load property data from CSV
property_df = pd.read_csv("Resources/property.csv")

In [8]:
from geopy.distance import great_circle

# Junction Table for property_grocery
property_grocery_df = pd.DataFrame(columns=['property_id', 'grocery_id'])

# Find nearby locations and create relationships
for property_row in property_df.itertuples():
    property_location = (property_row.latitude, property_row.longitude)
    
    for grocery_row in groceries_df.itertuples():
        grocery_location = (grocery_row.latitude, grocery_row.longitude)
        
        # Calculate distance between property and grocery locations
        distance_km = great_circle(property_location, grocery_location).kilometers
        
        # Define a distance threshold (adjust as needed)
        distance_threshold = 1.0  
        
        # If the distance is within the threshold, create a relationship
        if distance_km <= distance_threshold:
            new_row = pd.DataFrame({
                'property_id': [property_row.property_id],
                'grocery_id': [grocery_row.grocery_id]
            })
            property_grocery_df = pd.concat([property_grocery_df, new_row], ignore_index=True)

# Display the resulting DataFrame
print(property_grocery_df)


     property_id grocery_id
0              9        141
1              9        239
2             10        141
3             10        239
4             33        101
...          ...        ...
7043        8877        269
7044        8878        241
7045        8878        269
7046        8879        269
7047        8880        269

[7048 rows x 2 columns]


In [10]:
# Export junction table as CSV file
property_grocery_df.to_csv("Resources/property_grocery.csv", index=False)