# All Imports 

In [1]:
import folium
import pandas as pd
import numpy as np
from pyproj import CRS
import geopandas as gpd
from shapely.geometry import Point
from sklearn.neighbors import KernelDensity
from statsmodels.nonparametric.bandwidths import bw_silverman

# Data Loading

In [2]:
bus_stops = pd.read_csv('bus_stops.csv')
hospitals = pd.read_csv('hospitals.csv')
restaurants = pd.read_csv('restaurants.csv')

# Data Preparation

In [3]:
# Converting latitude and longitude into radians
bus_stops[['latitude','longitude']]   *= np.pi/180
hospitals[['latitude','longitude']]   *= np.pi/180
restaurants[['latitude','longitude']] *= np.pi/180
bus_stops['type'] = 'bus_stops'
hospitals['type'] = 'hospitals'
restaurants['type'] = 'restaurants'
total_points = pd.concat([bus_stops[['latitude','longitude','type']],
                  hospitals[['latitude','longitude','type']],
                  restaurants[['latitude','longitude','type']]],
                   ignore_index=True)

# Kernel Density Estimation

In [4]:
def kernel_density_estimator(data):
    sb = max(bw_silverman(data[['latitude','longitude']].to_numpy()))
    kde = KernelDensity(bandwidth=sb, metric='haversine',
                        kernel='gaussian', algorithm='ball_tree')
    estimator =  kde.fit(data[['latitude', 'longitude']].to_numpy())
    return estimator

# Estimating densities for each type of distributions

In [5]:
bus_stops_estimator = kernel_density_estimator(bus_stops)
hospitals_estimator = kernel_density_estimator(hospitals)
restaurants_estimator = kernel_density_estimator(restaurants)

# Finding best locations

In [6]:
def find_best_coordinates(samples,w_bus,w_hos,w_rest,nbpoints = 5):
    loc_vec = samples[['latitude','longitude']].to_numpy()
    score_bus = bus_stops_estimator.score_samples(loc_vec)
    score_hospitals = hospitals_estimator.score_samples(loc_vec)
    score_restaurants = restaurants_estimator.score_samples(loc_vec)
    total_score = (w_bus*score_bus 
                   + w_hos*score_hospitals 
                   + w_rest*score_restaurants)
    ind_bp = np.flip(np.argsort(total_score)[-nbpoints:])
    b_loc = samples.iloc[ind_bp]
    b_loc.index= range(1, 6)
    return b_loc

In [7]:
top_locations = find_best_coordinates(samples = total_points,
                                      w_bus=0.5,w_hos=0.2,
                                      w_rest=0.3,
                                      nbpoints = 5)

In [8]:
top_locations[['latitude','longitude']]   *= 180.0/np.pi

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [9]:
display(top_locations)

Unnamed: 0,latitude,longitude,type
1,12.944839,77.571819,restaurants
2,12.945072,77.571343,restaurants
3,12.948282,77.573674,bus_stops
4,12.948411,77.573578,bus_stops
5,12.948411,77.572026,hospitals


# Visualizations

Reference taken from code of Rukmangadh

In [10]:
# convert locations to points ('x' is longitude and 'y' is latitude)
top_points = [Point(location) for location in zip(
    top_locations['longitude'], top_locations['latitude'])]
gdf = gpd.GeoDataFrame({
    #'id': best_locations['id'].to_numpy(),
    'location_type': top_locations['type'].to_numpy(),
    'geometry': top_points
})  # geo dataframe

# set crs - coordinate reference system
gdf.crs = CRS.from_epsg(4326)  # latitude longitude system
# convert to mercator system because our map is a mercator map
gdf.to_crs(CRS.from_epsg(3395), inplace=True)

# map it
map_plot = folium.Map(location=[
    np.mean(top_locations['latitude'].to_numpy()),
    np.mean(top_locations['longitude'].to_numpy())], zoom_start=14)
points_gjson = folium.features.GeoJson(gdf['geometry'], name='best_locations')
points_gjson.add_to(map_plot)
map_plot

  return _prepare_from_string(" ".join(pjargs))
