In [1]:
import osmnx as ox
import geopandas as gpd
import matplotlib.pyplot as plt
from folium import Map, CircleMarker, LayerControl
import pyspark.sql.functions as F
from shapely.geometry import Point, Polygon
from shapely import wkt
from pyspark.sql.types import FloatType
import pandas as pd

# Step 1: Define the region (Victoria, Australia)
place_name = "Victoria, Australia"

# Step 2: Download amenities (hospitals) and shops (supermarkets)
# Hospital-like amenities
hospitals = ox.geometries_from_place(place_name, tags={"amenity": ["hospital", "clinic", "doctors"]})

# Supermarkets and related shops
supermarkets = ox.geometries_from_place(place_name, tags={"shop": ["supermarket", "grocery"]})

#malls
malls = ox.geometries_from_place(place_name, tags={"shop": ["mall"]})








  hospitals = ox.geometries_from_place(place_name, tags={"amenity": ["hospital", "clinic", "doctors"]})
  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)
  supermarkets = ox.geometries_from_place(place_name, tags={"shop": ["supermarket", "grocery"]})
  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)
  malls = ox.geometries_from_place(place_name, tags={"shop": ["mall"]})
  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


In [2]:
# Parks
parks_and_gardens = ox.geometries_from_place(place_name, tags={"leisure": ["park", "garden", "nature_reserve"]})

  parks_and_gardens = ox.geometries_from_place(place_name, tags={"leisure": ["park", "garden", "nature_reserve"]})
  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


Make sure to filter out Nan as that is the name of some locations

In [3]:
# getting everything to be a geodataframe to compare with listings

hospitals_gdf = gpd.GeoDataFrame(hospitals,geometry="geometry")

supermarkets_gdf = gpd.GeoDataFrame(supermarkets, geometry="geometry")

parks_and_gardens_gdf = gpd.GeoDataFrame(parks_and_gardens,geometry="geometry") 

mall_gdf = gpd.GeoDataFrame(malls,geometry="geometry")


In [4]:
# Data Validation and Data Cleaning 

# drop NA 
hospitals_gdf.dropna(subset=['geometry'])
supermarkets_gdf.dropna(subset=['geometry'])
parks_and_gardens_gdf.dropna(subset=['geometry'])
mall_gdf.dropna(subset=["geometry"])


# Getting Listing Data 
listings_df =pd.read_parquet('/Users/rchrdha/Documents/GitHub/project-2-group-real-estate-industry-project-34/data/curated/preprocessed_rent_data.parquet', engine='pyarrow')

listings_gdf = gpd.GeoDataFrame(
    listings_df,
    geometry = gpd.points_from_xy(listings_df.longitude, listings_df.latitude),
    crs='EPSG:3857'
)

listings_gdf_drop = listings_gdf.drop(columns=['propertyTypes'])


In [6]:
listings_gdf_drop = listings_gdf_drop.to_crs("EPSG:3857")


# nearest hospital to each listing and cleaning the resulting data set 
nearest_hospital = gpd.sjoin_nearest(listings_gdf_drop, hospitals_gdf, how="left", distance_col="distance")

nearest_hospital['hospital_geometry'] = nearest_hospital.apply(
    lambda row: hospitals_gdf.loc[(row['index_right0'], row['index_right1']), 'geometry'], axis=1
)

col_to_keep_hospitals = ['address_left','state','suburb','bedrooms','bathrooms','carspaces','date_listed','latitude','longitude','is_new_development','price',
'propertyId','is_furnished','year','month','day','SA2_CODE21','geometry','distance','hospital_geometry']

nearest_hospital_cleaned = nearest_hospital[col_to_keep_hospitals]
nearest_hospital_cleaned.rename(columns={'distance': 'distance_to_hospital'}, inplace=True)
nearest_hospital_cleaned.to_csv('nearest_hospitals_cleaned.csv', index=False)



Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:3857
Right CRS: EPSG:4326

  nearest_hospital = gpd.sjoin_nearest(listings_gdf_drop, hospitals_gdf, how="left", distance_col="distance")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nearest_hospital_cleaned.rename(columns={'distance': 'distance_to_hospital'}, inplace=True)


In [7]:
# Nearest Supermarket to each listing and cleaning the data up 

nearest_market = gpd.sjoin_nearest(listings_gdf_drop, supermarkets_gdf,how="left", distance_col="distance")

nearest_market['supermarket_geometry'] = nearest_market.apply(
    lambda row: supermarkets_gdf.loc[(row['index_right0'], row['index_right1']), 'geometry'], axis=1
)

col_to_keep_market = ['address','state','suburb','bedrooms','bathrooms','carspaces','date_listed','latitude','longitude','is_new_development','price',
'propertyId','is_furnished','year','month','day','SA2_CODE21','geometry','distance','supermarket_geometry']

nearest_market_cleaned = nearest_market[col_to_keep_market]
nearest_market_cleaned.rename(columns={'distance': 'distance_to_supermarket'}, inplace=True)
nearest_market_cleaned.to_csv("nearest_market.csv",index=False)


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:3857
Right CRS: EPSG:4326

  nearest_market = gpd.sjoin_nearest(listings_gdf_drop, supermarkets_gdf,how="left", distance_col="distance")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nearest_market_cleaned.rename(columns={'distance': 'distance_to_supermarket'}, inplace=True)


In [8]:
# Nearest Parks + Gardens + cleaning the data up 
nearest_parks = gpd.sjoin_nearest(listings_gdf_drop, parks_and_gardens_gdf,how="left",distance_col="distance")

nearest_parks['parks_geometry'] = nearest_parks.apply(
    lambda row: parks_and_gardens_gdf.loc[(row['index_right0'], row['index_right1']), 'geometry'], axis=1
)

col_to_keep_parks = ['address','state','suburb','bedrooms','bathrooms','carspaces','date_listed','latitude','longitude','is_new_development','price',
'propertyId','is_furnished','year','month','day','SA2_CODE21','geometry','distance','parks_geometry']

nearest_parks_cleaned = nearest_parks[col_to_keep_parks]
nearest_parks_cleaned.rename(columns={'distance': 'distance_to_park'}, inplace=True)

nearest_parks_cleaned.to_csv("nearest_parks.csv",index=False)

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:3857
Right CRS: EPSG:4326

  nearest_parks = gpd.sjoin_nearest(listings_gdf_drop, parks_and_gardens_gdf,how="left",distance_col="distance")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nearest_parks_cleaned.rename(columns={'distance': 'distance_to_park'}, inplace=True)


In [9]:
# nearest mall + cleaning the data up

nearest_mall = gpd.sjoin_nearest(listings_gdf_drop, mall_gdf,how="left", distance_col="distance")

nearest_mall['mall_geometry'] = nearest_mall.apply(
    lambda row: mall_gdf.loc[(row['index_right0'], row['index_right1']), 'geometry'], axis=1
)

col_to_keep_mall = ['address','state','suburb','bedrooms','bathrooms','carspaces','date_listed','latitude','longitude','is_new_development','price',
'propertyId','is_furnished','year','month','day','SA2_CODE21','geometry','distance','mall_geometry']

nearest_mall_cleaned = nearest_mall[col_to_keep_mall]
nearest_mall_cleaned.rename(columns={'distance': 'distance_to_mall'}, inplace=True)
nearest_mall_cleaned.to_csv("nearest_mall.csv",index=False)

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:3857
Right CRS: EPSG:4326

  nearest_mall = gpd.sjoin_nearest(listings_gdf_drop, mall_gdf,how="left", distance_col="distance")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nearest_mall_cleaned.rename(columns={'distance': 'distance_to_mall'}, inplace=True)


In [10]:
# combining all the nearest amenity into one dataframe

listings_gdf_with_nearest_amenity = listings_gdf_drop.copy()

hospital_to_add = nearest_hospital_cleaned[['distance_to_hospital','hospital_geometry']]
supermarket_to_add = nearest_market_cleaned[['distance_to_supermarket','supermarket_geometry']]
parks_to_add = nearest_parks_cleaned[['distance_to_park','parks_geometry']]
malls_to_add = nearest_mall_cleaned[['distance_to_mall','mall_geometry']]

listings_gdf_with_nearest_amenity = listings_gdf_with_nearest_amenity.join(hospital_to_add)
listings_gdf_with_nearest_amenity = listings_gdf_with_nearest_amenity.join(supermarket_to_add)
listings_gdf_with_nearest_amenity = listings_gdf_with_nearest_amenity.join(parks_to_add)
listings_gdf_with_nearest_amenity = listings_gdf_with_nearest_amenity.join(malls_to_add)

listings_gdf_with_nearest_amenity.to_csv("/Users/rchrdha/Documents/GitHub/project-2-group-real-estate-industry-project-34/data/curated/listing_with_nearest_amenity.csv",index=False)

In [11]:
# Step 3: Subsample 1000 of each
hospitals_sample = hospitals.sample(1000)
supermarkets_sample = supermarkets_gdf.sample(1000)

# Step 4: Convert to GeoDataFrame if needed (sometimes the data is already a GeoDataFrame)
hospitals_gdf = gpd.GeoDataFrame(hospitals_sample, geometry="geometry")
supermarkets_gdf = gpd.GeoDataFrame(supermarkets_sample, geometry="geometry")

In [12]:
### distance to CBD calculation 
### choose Flinders Street Station 

shp_path = "/Users/rchrdha/Documents/GitHub/project-2-group-real-estate-industry-project-34/data/raw/PTV_METRO_TRAIN_STATION.shx"
train_gdf = gpd.read_file(shp_path)

FLINDERS_GEOM = train_gdf.loc[train_gdf['STOP_NAME'] == 'Flinders Street Railway Station (Melbourne City)']


# calc distance from listing geometry to flinders street station geometry EUCLIDEAN DISTANCE 

def dist_CBD(listing, FLINDERS_GEOM):
    return listing.distance(FLINDERS_GEOM['geometry'])

dist_to_CBD = listings_gdf_with_nearest_amenity['geometry'].apply(lambda listing: dist_CBD(listing, FLINDERS_GEOM))

listings_gdf_with_nearest_amenity['distance_to_CBD'] = dist_to_CBD





In [13]:
# adding nearest schools into the dataset 

nearest_schools = pd.read_csv("/Users/rchrdha/Documents/GitHub/project-2-group-real-estate-industry-project-34/data/curated/nearest_school_to_listing.csv")
nearest_schools.head()

Unnamed: 0,address,state,suburb,bedrooms,bathrooms,propertyTypes,carspaces,date_listed,latitude,longitude,...,SA2_CODE21,geometry,Education_Sector,School_Name,Address_Line_1,Address_Town,Address_Postcode,X,Y,geometry_nearest
0,"1201/55 Queens Road, Melbourne VIC 3000",vic,Melbourne,2.0,2.0,['apartmentUnitFlat'],2.0,2009-05-08,-37.847967,144.97764,...,206051128.0,POINT (144.97764 -37.847967),Independent,Wesley College,577 St Kilda Road,MELBOURNE,3004,144.98214,-37.84883,POINT (144.98214 -37.84883)
1,"211/G04K Powlett Street, East Melbourne VIC 3002",vic,East Melbourne,3.0,2.0,['apartmentUnitFlat'],1.0,2009-05-08,-37.812575,144.985854,...,206041119.0,POINT (144.985854 -37.812575),Government,Collingwood College,Cnr Cromwell St/McCutcheon Way,Collingwood,3066,144.9905,-37.80298,POINT (144.9905 -37.80298)
2,Rye VIC 3941,vic,Rye,3.0,2.0,['house'],4.0,2009-05-08,-38.373284,144.817655,...,214021383.0,POINT (144.8176554 -38.3732839),Government,Rosebud Secondary College,245 Eastbourne Road,Rosebud,3939,144.88745,-38.36762,POINT (144.88745 -38.36762)
3,"1204/454 St Kilda Road, Melbourne St Kilda Roa...",vic,Melbourne St Kilda Road,3.0,3.0,['apartmentUnitFlat'],2.0,2009-05-11,-37.839405,144.976224,...,206051128.0,POINT (144.976224 -37.839405),Government,MacRobertson Girls High School,350-370 Kings Way,Melbourne,3004,144.97186,-37.83589,POINT (144.97186 -37.83589)
4,"211 Wellington Pde Sth, East Melbourne VIC 3002",vic,East Melbourne,3.0,3.0,['apartmentUnitFlat'],2.0,2009-05-11,-37.816637,144.977522,...,206041119.0,POINT (144.977522 -37.816637),Independent,Holmes Grammar School,185 Spring Street,MELBOURNE,3000,144.97227,-37.8103,POINT (144.97227 -37.8103)


In [1]:
# combining schools data with other amenity data into one dataframe 


school_data = pd.read_csv("/Users/rchrdha/Documents/GitHub/project-2-group-real-estate-industry-project-34/data/curated/listing_with_school.csv")

school_dataX = school_data[['geometry_nearest','dist_to_near_school']]

listings_gdf_with_nearest_amenity= listings_gdf_with_nearest_amenity.join(school_dataX)

listings_gdf_with_nearest_amenity.rename(columns={'geometry_nearest': 'school_geometry','dist_to_near_school':'distance_to_school'}, inplace=True)

listings_gdf_with_nearest_amenity.to_csv("/Users/rchrdha/Documents/GitHub/project-2-group-real-estate-industry-project-34/data/curated/listings_amenity_final.csv",index=False)

NameError: name 'pd' is not defined

In [25]:
### comparison of euclidean, manhattan and ORS distances 
### checking if the distribution of "closest" and "furthest" remain the same 


# Manhattan Distances 
def manhattan_dist(geom1, geom2):
    if isinstance(geom1, str):  # If geometry is in WKT format
        geom1 = wkt.loads(geom1)
    if isinstance(geom2, str):  # If geometry is in WKT format
        geom2 = wkt.loads(geom2)
    
    # Make sure both geometries are Points
    if isinstance(geom1, Point) and isinstance(geom2, Point):
        return abs(geom1.x - geom2.x) + abs(geom1.y - geom2.y)
    else:
        raise ValueError("Both geometries must be Points")
listings_gdf_with_nearest_amenity_copy = listings_gdf_with_nearest_amenity.copy().head(10)

listings_gdf_with_nearest_amenity_copy['manhattan_distance_school'] = listings_gdf_with_nearest_amenity_copy.apply(lambda row: manhattan_dist(row['geometry'], row['school_geometry']), axis=1)


# ORS Computed Distances

import openrouteservice
from io import StringIO 

client = openrouteservice.Client(key="5b3ce3597851110001cf6248afd1cab1431b4151b537945f4e0db634")


def calc_route_dist(listing_point, school_point):
    if isinstance(listing_point, str):  # If geometry is in WKT format
        listing_point = wkt.loads(listing_point)
    if isinstance(school_point, str):  # If geometry is in WKT format
        school_point = wkt.loads(school_point)

    listing_coord = (listing_point.x, listing_point.y)
    school_coord = (school_point.x, school_point.y)

    try:
        routes = client.directions(
            coordinates=[listing_coord, school_coord],
            profile='driving-car',
            format='geojson'
        )

        distance = routes['features'][0]['properties']['segments'][0]['distance']
        return distance 
    except Exception as e:
        print(f"Error calculating distance: {e}")
        return None



listings_gdf_with_nearest_amenity_copy['distance_to_nearest_school_ORS'] = listings_gdf_with_nearest_amenity_copy.apply(
    lambda row: calc_route_dist(row['geometry'], row['school_geometry']),
    axis=1
)

listings_gdf_with_nearest_amenity_copy


Unnamed: 0,address,state,suburb,bedrooms,bathrooms,carspaces,date_listed,latitude,longitude,is_new_development,...,supermarket_geometry,distance_to_park,parks_geometry,distance_to_mall,mall_geometry,distance_to_CBD,school_geometry,distance_to_school,manhattan_distance_school,distance_to_nearest_school_ORS
0,"1201/55 Queens Road, Melbourne VIC 3000",vic,Melbourne,2.0,2.0,2.0,2009-05-08,-37.847967,144.97764,False,...,POINT (144.97693 -37.83855),0.000332,"POLYGON ((144.96799 -37.83755, 144.96845 -37.8...",0.015313,"POLYGON ((144.98986 -37.83828, 144.98978 -37.8...",0.031525,POINT (144.98214 -37.84883),0.004582,0.005363,566.9
1,"211/G04K Powlett Street, East Melbourne VIC 3002",vic,East Melbourne,3.0,2.0,1.0,2009-05-08,-37.812575,144.985854,False,...,POINT (144.98563 -37.80974),0.000866,"POLYGON ((144.98615 -37.81176, 144.98616 -37.8...",0.009826,"POLYGON ((144.99569 -37.80995, 144.99545 -37.8...",0.01974,POINT (144.9905 -37.80298),0.010661,0.014241,1660.7
2,Rye VIC 3941,vic,Rye,3.0,2.0,4.0,2009-05-08,-38.373284,144.817655,False,...,"POLYGON ((144.83053 -38.37193, 144.83027 -38.3...",0.003733,"MULTIPOLYGON (((144.74055 -38.33308, 144.74047...",0.073176,"POLYGON ((144.88993 -38.36185, 144.88994 -38.3...",0.574713,POINT (144.88745 -38.36762),0.070024,0.075458,6666.8
3,"1204/454 St Kilda Road, Melbourne St Kilda Roa...",vic,Melbourne St Kilda Road,3.0,3.0,2.0,2009-05-11,-37.839405,144.976224,False,...,POINT (144.97693 -37.83855),0.001916,"POLYGON ((144.96799 -37.83755, 144.96845 -37.8...",0.013582,"POLYGON ((144.98986 -37.83828, 144.98978 -37.8...",0.023043,POINT (144.97186 -37.83589),0.005604,0.007879,706.5
4,"211 Wellington Pde Sth, East Melbourne VIC 3002",vic,East Melbourne,3.0,3.0,2.0,2009-05-11,-37.816637,144.977522,False,...,POINT (144.97115 -37.80989),0.000429,"POLYGON ((144.97523 -37.81571, 144.97577 -37.8...",0.011072,"POLYGON ((144.96535 -37.82038, 144.96535 -37.8...",0.010689,POINT (144.97227 -37.8103),0.00823,0.011589,1335.4
5,"12/1 Exhibition Street, Melbourne St Kilda Roa...",vic,Melbourne St Kilda Road,1.0,1.0,0.0,2009-05-25,-37.815858,144.972028,False,...,POINT (144.97004 -37.81049),0.001903,"POLYGON ((144.97225 -37.81776, 144.97274 -37.8...",0.006539,"POLYGON ((144.96535 -37.82038, 144.96535 -37.8...",0.005624,POINT (144.97056 -37.81154),0.004561,0.005786,509.5
6,"2/208 Albion Street, Brunswick VIC 3056",vic,Brunswick,2.0,1.0,1.0,2009-06-08,-37.76077,144.961728,False,...,POINT (144.96283 -37.76209),0.001823,"POLYGON ((144.96146 -37.76273, 144.96112 -37.7...",0.014137,"POLYGON ((144.96235 -37.77610, 144.96233 -37.7...",0.057773,POINT (144.96234 -37.77182),0.011067,0.011662,1402.5
7,"4/208 Albion Street, Brunswick VIC 3056",vic,Brunswick,2.0,1.0,1.0,2009-06-08,-37.76077,144.961728,False,...,POINT (144.96283 -37.76209),0.001823,"POLYGON ((144.96146 -37.76273, 144.96112 -37.7...",0.014137,"POLYGON ((144.96235 -37.77610, 144.96233 -37.7...",0.057773,POINT (144.96234 -37.77182),0.011067,0.011662,1402.5
8,"1/208 Albion Street, Brunswick VIC 3056",vic,Brunswick,1.0,1.0,0.0,2009-06-12,-37.760855,144.961691,False,...,POINT (144.96283 -37.76209),0.001734,"POLYGON ((144.96146 -37.76273, 144.96112 -37.7...",0.014051,"POLYGON ((144.96235 -37.77610, 144.96233 -37.7...",0.057691,POINT (144.96234 -37.77182),0.010984,0.011614,1392.7
9,"2513/250 Elizabeth Street, Melbourne VIC 3000",vic,Melbourne,1.0,1.0,0.0,2009-06-12,-37.812679,144.962732,False,...,POINT (144.96217 -37.81050),0.002576,"POLYGON ((144.96039 -37.81382, 144.96040 -37.8...",0.000372,"POLYGON ((144.96468 -37.81260, 144.96429 -37.8...",0.00704,POINT (144.97056 -37.81154),0.00791,0.008967,1113.7


In [35]:
import osmnx as ox 

# distance to nearest universities (in world top 500 in Victoria)

universities = [
    "Monash University, Clayton, Australia",
    "University of Melbourne Parkville, Melbourne, Australia",
    "RMIT University, La Trobe Street, Melbourne",
    "Swinburne University, Glenferrie, Australia",
    "La Trobe University, Melbourne, Australia",
    "Deakin University, Melbourne, Australia"
]

univ_list = [] 

for uni in universities:
    try:
        university_gdf = ox.geocode_to_gdf(uni)
        univ_list.append(university_gdf)

    except Exception as e:
        print(f"Error fetching data for {uni}: {e}")

universities_gdf = gpd.GeoDataFrame(pd.concat(univ_list, ignore_index=True))
print(universities_gdf)



                                            geometry  bbox_north  bbox_south  \
0  POLYGON ((145.13127 -37.91439, 145.13127 -37.9...  -37.913359  -37.914724   
1  MULTIPOLYGON (((144.95298 -37.79764, 144.95381...  -37.792025  -37.804255   
2  POLYGON ((144.96307 -37.80751, 144.96398 -37.8...  -37.807001  -37.809449   
3  POLYGON ((145.03312 -37.82336, 145.03422 -37.8...  -37.822933  -37.823487   
4  POLYGON ((145.03730 -37.72343, 145.03817 -37.7...  -37.712965  -37.728743   
5  POLYGON ((145.11283 -37.84629, 145.11350 -37.8...  -37.845397  -37.849949   

    bbox_east   bbox_west  place_id  osm_type     osm_id        lat  \
0  145.132093  145.131267  17587791       way  901369852 -37.914132   
1  144.965688  144.952976  50132756  relation   15629064 -37.796095   
2  144.966114  144.963071  17927931       way  616082731 -37.808161   
3  145.034295  145.033125  17725589       way  860292292 -37.823188   
4  145.057840  145.037305  17935561       way   27877532 -37.721356   
5  145.116892

In [47]:
universities_gdf = universities_gdf[['lat','lon','osm_id','name']]

universities_gdf['geometry'] = universities_gdf.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
univ_gdf = gpd.GeoDataFrame(universities_gdf, geometry='geometry')

# finding distances between each listing and university

nearest_univ = gpd.sjoin_nearest(listings_gdf_drop, univ_gdf,how="left", distance_col="distance")

nearest_univ['uni_geometry'] = nearest_univ.apply(lambda row: Point(row['lon'], row['lat']), axis=1)

col_to_keep_uni = ['address','state','suburb','bedrooms','bathrooms','carspaces','date_listed','latitude','longitude','is_new_development','price',
'propertyId','is_furnished','year','month','day','SA2_CODE21','geometry','distance','uni_geometry']

nearest_uni_cleaned = nearest_univ[col_to_keep_uni]
nearest_uni_cleaned.rename(columns={'distance': 'distance_to_uni'}, inplace=True)


uni_dataX = nearest_uni_cleaned[['uni_geometry','distance_to_uni']]

listings_gdf_with_nearest_amenity= listings_gdf_with_nearest_amenity.join(uni_dataX)

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:3857
Right CRS: None

  nearest_univ = gpd.sjoin_nearest(listings_gdf_drop, univ_gdf,how="left", distance_col="distance")
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nearest_uni_cleaned.rename(columns={'distance': 'distance_to_uni'}, inplace=True)


ValueError: columns overlap but no suffix specified: Index(['uni_geometry', 'distance_to_uni'], dtype='object')

In [50]:
# converting to PARQUET and CSV and exporting the file 

listings_gdf_with_nearest_amenity.to_parquet('/Users/rchrdha/Documents/GitHub/project-2-group-real-estate-industry-project-34/data/curated/final_amenity_data.parquet', engine='pyarrow')
listings_gdf_with_nearest_amenity.to_csv('/Users/rchrdha/Documents/GitHub/project-2-group-real-estate-industry-project-34/data/curated/final_amenity_data.csv')
