In [9]:
from pyproj import Proj, Geod

# Initialize the SVY21 projection
svy21_proj = Proj(init='EPSG:3414')  # EPSG code for SVY21

# Coordinates in SVY21
# x1, y1 = 28001.642, 38744.572  # Point 1
# x2, y2 = 28001.993, 38758.303  # Point 2

# Create a geodesic object to calculate distance
geod = Geod(ellps='WGS84')

# Convert SVY21 coordinates to latitude and longitude
def calculate_distance(x1, y1, lon2, lat2):
    lon1, lat1 = svy21_proj(x1, y1, inverse=True)
    # lon2, lat2 = svy21_proj(x2, y2, inverse=True)

    # Calculate distance
    angle1, angle2, distance = geod.inv(lon1, lat1, lon2, lat2)

    # print(f"Distance between the two points is: {distance:.2f} meters")
    return distance

  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [16]:
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd
import db_dtypes

# Provide the path to the service account key file
credentials = service_account.Credentials.from_service_account_file(
    'key.json')

# Create a BigQuery client using the credentials
client = bigquery.Client(credentials=credentials, project=credentials.project_id)
# Define the query
rental_query = """
SELECT *
FROM `rental-price-prediction.ura_data.rental_contracts`
"""

bus_query = """
SELECT *
FROM `rental-price-prediction.ura_data.bus_stops`
"""

# Run the query and get a DataFrame
query_job = client.query(rental_query)  # API request
results = query_job.result()  # Waits for the query to finish

# print(results)
# Convert to a DataFrame
rental_df = results.to_dataframe()
print('finished rental query')

# Run the query and get a DataFrame
query_job2 = client.query(bus_query)  # API request
results2 = query_job2.result()  # Waits for the query to finish


# Convert to a DataFrame
bus_df = results2.to_dataframe()
print('finished bus query')

# Print the DataFrame
print(rental_df[:5])
print(bus_df[:5])




finished rental query
finished bus query
                  leaseDate           propertyType district  noOfBedRoom  \
0 2022-04-01 00:00:00+00:00  Non-landed Properties       01           -1   
1 2021-01-01 00:00:00+00:00  Non-landed Properties       01            2   
2 2021-01-01 00:00:00+00:00  Non-landed Properties       01            2   
3 2021-01-01 00:00:00+00:00  Non-landed Properties       01            2   
4 2021-01-01 00:00:00+00:00  Non-landed Properties       01            1   

     rent           street            x            y    project refPeriod  \
0  4358.0     GEMMILL LANE  29541.24939  29443.98402    THE BOX    2022Q2   
1  4000.0  MCCALLUM STREET  29565.49115  29089.90010  THE CLIFT    2021Q1   
2  3700.0  MCCALLUM STREET  29565.49115  29089.90010  THE CLIFT    2021Q1   
3  4350.0  MCCALLUM STREET  29565.49115  29089.90010  THE CLIFT    2021Q1   
4  2800.0  MCCALLUM STREET  29565.49115  29089.90010  THE CLIFT    2021Q1   

   areaSqft_formatted  leaseYear  lease

In [28]:
# caluclate distance between bus stops and rental contracts
distance_dict = {}
for index, row in rental_df.iterrows():
    # print(f"Processing rental contract {index} x: {row['x']} y: {row['y']}")
    num_bus_stops = 0
    # bus_stop_ids = []
    x1, y1 = row['x'], row['y']
    # check if x,y has been calculated before
    if (x1, y1) in distance_dict:
        num_bus_stops = distance_dict[(x1, y1)]
        # print(distance_dict[(x1, y1)])
    else:
        for index2, row2 in bus_df.iterrows():
            lon2, lat2 = row2['Longitude'], row2['Latitude']
            distance = calculate_distance(x1, y1, lon2, lat2)
            if distance < 1000: # if the distance is less than 1km
                num_bus_stops += 1
                # bus_stop_ids.append(row2['BusStopID'])
        
        # save calculated result in table for faster access
        distance_dict[(x1, y1)] = num_bus_stops
    # add a new column to the rental_df with the distance between the bus stop and the rental contract
    rental_df.at[index, 'num_bus_stops'] = num_bus_stops
    # print(f"Number of bus stops within 1km of rental contract {index}: {num_bus_stops}")
print(rental_df{:5})


Number of bus stops within 1km of rental contract 0: 102
Number of bus stops within 1km of rental contract 1: 95
Number of bus stops within 1km of rental contract 2: 95
Number of bus stops within 1km of rental contract 3: 95
Number of bus stops within 1km of rental contract 4: 95
Number of bus stops within 1km of rental contract 5: 95
Number of bus stops within 1km of rental contract 6: 95
Number of bus stops within 1km of rental contract 7: 95
Number of bus stops within 1km of rental contract 8: 95
Number of bus stops within 1km of rental contract 9: 95
Number of bus stops within 1km of rental contract 10: 95
Number of bus stops within 1km of rental contract 11: 95
Number of bus stops within 1km of rental contract 12: 95
Number of bus stops within 1km of rental contract 13: 95
Number of bus stops within 1km of rental contract 14: 95
Number of bus stops within 1km of rental contract 15: 95
Number of bus stops within 1km of rental contract 16: 95
Number of bus stops within 1km of rental

In [None]:
# create a new table with the bus stop
rental_df.to_gbq('ura_data.rental_contracts_with_num_bus_stops', project_id='rental-price-prediction', if_exists='replace', credentials=credentials)
print('finished saving to bigquery')
