# Coffeeshop Location Recommender for Philadelphia

## Imports

In [9]:
import json
import folium
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

## Load Data

In [10]:
projected_crs = 'EPSG:6564'

# Load park data
park_gdf = gpd.read_file('/kaggle/input/philadelphia-data/PPR_Properties.geojson')
park_gdf = park_gdf.to_crs(projected_crs) # Project in plain coordinate system

# Load university data
university_gdf = gpd.read_file('/kaggle/input/philadelphia-data/Universities_Colleges.geojson')
university_gdf = university_gdf.to_crs(projected_crs) # Project in plain coordinate system

# Load university data
streets_gdf = gpd.read_file('/kaggle/input/philadelphia-data/CompleteStreets.geojson')
streets_gdf = streets_gdf.to_crs(projected_crs) # Project in plain coordinate system

## Data Analysis Functions

### Check Coordinate Validity

In [11]:
def is_in_philadelphia(lat, lon):
    """Check if coordinates are in philadelphia"""
    # TODO
    pass

def is_in_park(lat, lon):
    """Check if coordinates are in a park"""
    # TODO
    pass

def is_in_water(lat, lon):
    """Check if coordinates are in the water, i.e. river or lake"""
    # TODO
    pass

### Calculate Distances


In [12]:
def distance_to(coordinates_df, gdf, column_name):
    """Calculate the distance from given point to the closest polygon in gdf.
    
    Returns: Pandas DataFrame with two columns.
    """
    # Create geopandas DataFrame from given points
    point_gdf = gpd.points_from_xy(coordinates_df["Longitude"], coordinates_df["Latitude"], crs="WGS84")
    # Transform coordinates to planar
    point_gdf = point_gdf.to_crs(projected_crs)

    min_distance_list = []

    for point in point_gdf:
        # Calculate distance of point to all polygons
        distances = gdf['geometry'].distance(point)
        # Find index of closest park
        idxmin = distances.idxmin()

        min_distance_list.append([distances[idxmin], gdf[column_name].loc[idxmin]])
    
    min_distances_df = pd.DataFrame(min_distance_list, columns=["Distance [m]", column_name])
    return min_distances_df

def distance_to_park(coordinates_df):
    """Calculate the distance to closest parks.
    
    Returns: Pandas DataFrame with two columns (Distance [m], PUBLIC_NAME)
    """
    return distance_to(coordinates_df, park_gdf, 'PUBLIC_NAME')

def distance_to_university(coordinates_df):
    """Calculate the distance to closest university.
    
    Returns: Pandas DataFrame with two columns (Distance [m], NAME)
    """
    return distance_to(coordinates_df, university_gdf, 'NAME')


def distance_to_public_transport(lat, lon, n=1):
    """Calculate the distance to n closest bus/train stations
    Returns: List of n Tuples with distance and the object that is close by."""
    # TODO
    pass

## Recommender

### Retrieve Data

In [13]:
def retrieve_data(coordinates):
    """Calculate the data categories for the given coordinates.
    
    Returns: DataFrame with data categories as columns.
    """
    data_df = coordinates

    # Distance to university
    data_df['University Distance'] = distance_to_university(coordinates)["Distance [m]"]

    # Distance to park
    data_df['Park Distance'] = distance_to_park(coordinates)["Distance [m]"]
    return data_df

In [73]:
def linear_score(data_series, min_val=0, max_val=500, lower_is_better=True):
    """Project values between min_val and max_val linearly between 1 and 0."""

    data_series[data_series < min_val] = 0
    data_series[data_series > max_val] = max_val

    if lower_is_better:
        return 1 - data_series/max_val

    return data_series/max_val

def conv_data_to_scores(data_df, weights):
    """Convert the data values into scores between 0 and 1, where 1 is a good score.
    
    Returns: DataFrame with the same column names as data_df but scores instead of raw data values.
    """

    score_df = data_df[['Latitude', 'Longitude']].copy(deep=True)
        
    if "Park Distance" in weights.keys():
        score_df["Park Distance"] = linear_score(data_df["Park Distance"], min_val = 10, max_val=500, lower_is_better=True)
        
    if "University Distance" in weights.keys():
        score_df["University Distance"] = linear_score(data_df["University Distance"], min_val = 10, max_val=500, lower_is_better=True)
    
    return score_df
        

In [74]:
def combine_scores(score_df, weights):
    return score_df[weights.keys()].mul(weights).sum(1)

In [75]:
def calculate_score(coordinates, weights={}):
    
    data_df = retrieve_data(coordinates)

    if len(weights) == 0:
        # Create a weight dictionary with equal weighting for each data column
        data_columns = data_df.drop(["Latitude", "Longitude"], axis=1).columns
        weights = {col_name: 1/len(data_columns) for col_name in data_columns}

    score_df_ = conv_data_to_scores(data_df, weights)
    score_df_['Score'] = combine_scores(score_df_, weights)
    return score_df_

Unnamed: 0,Latitude,Longitude,Park Distance,University Distance,Score
0,40.051854,-75.047369,0.221857,0.0,0.110929


## Recommend

In [None]:
def create_coordinate_grip(top_left_coords, bottom_right_coords, )

In [None]:
# Coordinates for which the score is to be calculated
coordinates = pd.DataFrame([[40.051854, -75.047369]], columns=["Latitude", "Longitude"])
score_df = calculate_score(coordinates)
score_df

## Visualizations

### Score Heatmap

In [79]:
import folium

from folium.plugins import HeatMap

heat_map = folium.Map([40, -75], zoom_start=10)

heat_coords = score_df[['Latitude', 'Longitude', 'Score']]
lats_longs = [
                [38.27312, -98.5821872, 0.5], # Kansas
                [34.395342, -111.763275,0.2], # Arizona
                [37.5726028, -85.1551411, 0.7], # Kentucky
                [32.3293809, -83.1137366,0.9], # Georgia
                [40.0796606, -89.4337288,0.1], # Illinois
            ]


folium.plugins.HeatMap(heat_coords).add_to(heat_map)

heat_map

----------------------------------------------------------------------------------------------------
## Playground

In [None]:
# Create a Shapely Point object
lat = 40.051854
lon = -75.047369

point_gdf = gpd.GeoSeries([Point(lon, lat)])
point_gdf.crs = "WGS84"
point_gdf = point_gdf.to_crs(projected_crs)

dist = park_gdf.distance(point_gdf[0])
dist

# Calculate distances to all polygons
park_gdf['distance_to_point'] = park_gdf['geometry'].distance(point_gdf[0])

# Sort by distance
park_gdf = park_gdf.sort_values(by='distance_to_point', ascending=True)

# Select the n closest polygons
n = 3
closest_polygons = park_gdf.head(n)
closest_polygons

In [None]:

pd.set_option('display.max_columns', 500)
streets_gdf

In [None]:
university_gdf.plot()

In [None]:
# Opening JSON file
f = open('/kaggle/input/philadelphia-data/CompleteStreets.geojson')
geo_json_data = json.load(f)
m = folium.Map([40, -75], zoom_start=10)

folium.GeoJson(geo_json_data).add_to(m)

m