Calculating proximity to key entertainment areas and transport routes from addresses. Some distances of interest would be distances to:
- tram stops
- bus stops 
- train stops 
- the CBD 
- Shopping Centres

These distances will be calculated using geopy euclidean distances.

In [20]:
import pandas as pd
import geopandas as gpd 
import shapely

In [21]:
addresses = pd.read_parquet('data/landing/preprocessed_rent_data.parquet')
bus_stops = gpd.read_file('data/raw/PTV_METRO_BUS_STOP.shp', crs = "EPSG:4326")
tram_stops = gpd.read_file('data/raw/PTV_METRO_TRAM_STOP.shp',  crs = "EPSG:4326")
train_stops = gpd.read_file('data/raw/PTV_METRO_TRAIN_STATION.shp',  crs = "EPSG:4326")

coords = addresses[['latitude', 'longitude']]
gdf_coords = gpd.GeoDataFrame(coords, geometry=gpd.points_from_xy(coords['longitude'], coords['latitude']))

In [22]:
print(bus_stops)

                          geometry
0      POINT (145.28815 -37.78092)
1      POINT (145.29284 -37.77463)
2      POINT (145.29546 -37.77045)
3      POINT (145.30178 -37.76635)
4      POINT (145.30441 -37.76528)
...                            ...
18554  POINT (144.82012 -37.79465)
18555  POINT (144.81745 -37.79491)
18556  POINT (144.81713 -37.79471)
18557  POINT (144.81189 -37.79470)
18558  POINT (144.93327 -37.76341)

[18559 rows x 1 columns]


In [23]:
gdf_coords.head(10)

Unnamed: 0,latitude,longitude,geometry
0,-37.847967,144.97764,POINT (144.97764 -37.84797)
1,-37.812575,144.985854,POINT (144.98585 -37.81258)
2,-38.373284,144.817655,POINT (144.81766 -38.37328)
3,-37.839405,144.976224,POINT (144.97622 -37.83940)
4,-37.816637,144.977522,POINT (144.97752 -37.81664)
5,-37.815858,144.972028,POINT (144.97203 -37.81586)
6,-37.76077,144.961728,POINT (144.96173 -37.76077)
7,-37.76077,144.961728,POINT (144.96173 -37.76077)
8,-37.760855,144.961691,POINT (144.96169 -37.76085)
9,-37.812679,144.962732,POINT (144.96273 -37.81268)


In [24]:
# Function to calculate distances between each coordinate and the nearest tram stop
def calculate_distances(gdf_coords, transport):
    distances = []
    for coord in gdf_coords['geometry']:
        dist_to_stop = transport.geometry.distance(coord)
        distances.append(dist_to_stop.min())  # minimum distance to any tram stop
    return distances

In [25]:
# Add a new column for distances to the nearest tram stop
gdf_coords['distance_to_bus_stop'] = calculate_distances(gdf_coords, bus_stops)
print('doing train stations now')
gdf_coords['distance_to_train_stop'] = calculate_distances(gdf_coords, train_stops)
print('doing tram stops now')
gdf_coords['distance_to_tram_stop'] = calculate_distances(gdf_coords,tram_stops)
print('done!')


# If you want to save it as a pandas DataFrame without geometry
df_with_distances = pd.DataFrame(gdf_coords.drop(columns='geometry'))

# Save the result to a CSV file if needed
df_with_distances.to_parquet('data/landing/preprocessed_rent_distances_data.parquet')

doing train stations now
doing tram stops now
done!
