In [2]:
pip install geopy

Collecting geopy
  Downloading geopy-2.3.0-py3-none-any.whl (119 kB)
     -------------------------------------- 119.8/119.8 kB 2.3 MB/s eta 0:00:00
Collecting geographiclib<3,>=1.52
  Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
     ---------------------------------------- 40.3/40.3 kB ? eta 0:00:00
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-2.0 geopy-2.3.0
Note: you may need to restart the kernel to use updated packages.


In [3]:
# Import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from scipy.spatial.distance import cdist
from geopy.distance import geodesic

# Import data
df_rent = pd.read_csv('BP_Final_Data_for_processing.csv')
df_geo = pd.read_csv('GS_Final_Data_for_processing.csv')


In [4]:
# Calculate the distance from each apartment to the nearest object of each type:

# Create a list of unique types from df_geo
unique_types = df_geo['Type'].unique()

# Initialize dictionaries to store distances and coordinates for each type
distances_dict = {}
coordinates_dict = {}

# Loop through each unique type and calculate distances
for obj_type in unique_types:
    # Filter df_geo to get only the specific type
    df_type = df_geo[df_geo['Type'] == obj_type]
    
    # Create a matrix of distances between apartments and objects of the specific type
    distances = cdist(df_rent[['Latitude', 'Longitude']], df_type[['Latitude', 'Longitude']])
    
    # Find the minimum distance for each apartment
    min_distances = np.min(distances, axis=1)
    
    # Find the index of the closest object for each apartment
    min_indices = np.argmin(distances, axis=1)
    
    # Get the corresponding coordinates for the closest object
    closest_coordinates = [(df_type.iloc[idx]['Latitude'], df_type.iloc[idx]['Longitude']) for idx in min_indices]
    
    # Store the closest coordinates in the coordinates dictionary with descriptive column names
    coordinates_dict[f"closest_{obj_type}_latitude"] = [coord[0] for coord in closest_coordinates]
    coordinates_dict[f"closest_{obj_type}_longitude"] = [coord[1] for coord in closest_coordinates]

    # Calculate the distances using geodesic and store in distances_dict for all object types
    distances_dict[f"distance_to_{obj_type}"] = [geodesic((apt_lat, apt_lon), (obj_lat, obj_lon)).kilometers
                                                 for apt_lat, apt_lon, obj_lat, obj_lon
                                                 in zip(df_rent['Latitude'], df_rent['Longitude'],
                                                        df_coordinates[f"closest_{obj_type}_latitude"],
                                                        df_coordinates[f"closest_{obj_type}_longitude"])]

# Create a new dataframe to store the coordinates
df_coordinates = pd.DataFrame(coordinates_dict)

# Create a new dataframe to store the distances
df_distances = pd.DataFrame(distances_dict)

# Concatenate df_rent, df_coordinates, and df_distances
df_result = pd.concat([df_rent, df_coordinates, df_distances], axis=1)

df_result


NameError: name 'df_coordinates' is not defined

In [None]:
# Clean up the table:
# Drop the columns with coordinates for the objects
columns_to_drop = [col for col in df_result.columns if "_latitude" in col or "_longitude" in col]
df_result_cleaned = df_result.drop(columns=columns_to_drop)

# Drop the "Property type" column
df_result_cleaned = df_result_cleaned.drop(columns=["Property type"])
# Drop the "City" column
df_result_cleaned = df_result_cleaned.drop(columns=["City"])
# Drop the "Postal code" column
df_result_cleaned = df_result_cleaned.drop(columns=["Postal code"])

# Rename the distance columns
column_rename_mapping = {
    "distance_to_Public Transport Stations": "Distance to Transport Station (km)",
    "distance_to_Beaches": "Distance to Beach (km)",
    "distance_to_Schools": "Distance to School (km)",
    "distance_to_Restaurants": "Distance to Restaurant (km)",
    "distance_to_Hospitals": "Distance to Hospital (km)",
    "distance_to_Malls": "Distance to Mall (km)",
}

df_result_cleaned = df_result_cleaned.rename(columns=column_rename_mapping)

# Save the cleaned table
df_result_cleaned.to_csv("Final_Data.csv", index=False)

df_result_cleaned.head(50)