In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import requests
pd.set_option('max_colwidth', 400)

In [2]:
# API key and URL
api_key = 'Your_API_Key'
api_url = 'https://api.yelp.com/v3/businesses/search'

# Params
params = {
    'term': 'public transport',
    'location': 'Austin',
    'categories': 'publictransport',
    'limit': 50,
    'offset': 0
}

# Set up headers
headers = {
    'Authorization': api_key
}

# Empty list for results
all_publictransport = []
results_per_request = 50

while True:
    # API request
    response = requests.get(api_url, params=params, headers=headers)

    # If request successful
    if response.status_code == 200:
        data = response.json()
        businesses = data.get('businesses', [])
        all_publictransport.extend(businesses)

        # Check if more results
        if len(businesses) < results_per_request:
            break

        # Update offset for the next request
        params['offset'] += results_per_request

    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        break

# Create list of dictionaries
publictransport_data = []
for business in all_publictransport:
    coordinates = business.get('coordinates', {})
    publictransport_data.append({
        'Name': business.get('name', ''),
        'Address': ', '.join(business.get('location', {}).get('display_address', [])),
        'Ratings': business.get('rating', 0),
        'Latitude': coordinates.get('latitude', 0),
        'Longitude': coordinates.get('longitude', 0)
    })

# Create a Pandas DataFrame
publictransport_df = pd.DataFrame(publictransport_data)
publictransport_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Name       18 non-null     object 
 1   Address    18 non-null     object 
 2   Ratings    18 non-null     float64
 3   Latitude   18 non-null     float64
 4   Longitude  18 non-null     float64
dtypes: float64(3), object(2)
memory usage: 848.0+ bytes


In [3]:
# Create DF
publictransports_df = publictransport_df[['Name', 'Address', 'Ratings', 'Latitude', 'Longitude']].copy()

# Rename columns
publictransports_df.columns = ['publictransport_name', 'publictransport_address', 'publictransport_ratings', 'latitude', 'longitude']

# Add new column for publictransport_id
publictransports_df['publictransport_id'] = range(1, len(publictransports_df) + 1)

# Reorder columns
publictransports_df = publictransports_df[['publictransport_id', 'publictransport_name', 'publictransport_address', 'publictransport_ratings', 'latitude', 'longitude']]
print(publictransports_df.head())

   publictransport_id         publictransport_name  \
0                   1                Capital Metro   
1                   2   North Lamar Transit Center   
2                   3  MetroRail - Leander Station   
3                   4             Metrobike Austin   
4                   5   Highland Metrorail Station   

                            publictransport_address  publictransport_ratings  \
0           209 W 9th St, Ste 100, Austin, TX 78701                      2.0   
1  N Lamar Blvd And Research Blvd, Austin, TX 78758                      2.0   
2               800 N US Hwy 183, Leander, TX 78641                      5.0   
3         1000 Brazos St, Ste 100, Austin, TX 78701                      2.0   
4               6400 Airport Blvd, Austin, TX 78752                      3.0   

    latitude  longitude  
0  30.271392 -97.744063  
1  30.348380 -97.712510  
2  30.587690 -97.855900  
3  30.271542 -97.740194  
4  30.328699 -97.716183  


In [4]:
# Export DF as CSV file
publictransports_df.to_csv("Resources/publictransports.csv", index=False)


In [5]:
# Load property data from CSV
property_df = pd.read_csv("Resources/property.csv")

In [6]:
from geopy.distance import great_circle

# Junction Table for property_public_transport
property_public_transport_df = pd.DataFrame(columns=['property_id', 'publictransport_id'])

# Find nearby locations and create relationships
for property_row in property_df.itertuples():
    property_location = (property_row.latitude, property_row.longitude)
    
    for pt_row in publictransports_df.itertuples():
        pt_location = (pt_row.latitude, pt_row.longitude)
        
        # Calculate distance between property and public transport locations
        distance_km = great_circle(property_location, pt_location).kilometers
        
        # Define a distance threshold (adjust as needed)
        distance_threshold = 1.0  
        
        # If the distance is within the threshold, create a relationship
        if distance_km <= distance_threshold:
            new_row = pd.DataFrame({
                'property_id': [property_row.property_id],
                'publictransport_id': [pt_row.publictransport_id]
            })
            property_public_transport_df = pd.concat([property_public_transport_df, new_row], ignore_index=True)

# Display the resulting DataFrame
print(property_public_transport_df)


    property_id publictransport_id
0           964                  1
1           964                  4
2           964                 14
3           964                 15
4           964                 16
..          ...                ...
390        8738                  2
391        8740                  2
392        8800                  2
393        8807                  2
394        8844                  2

[395 rows x 2 columns]


In [7]:
# Export junction table as CSV file
property_public_transport_df.to_csv("Resources/property_publictransport.csv", index=False)