# NYRentalGeoPy
---



In [41]:
# Dependencies and Setup
import hvplot.pandas
import pandas as pd
import requests
from pathlib import Path
import matplotlib.pyplot as plt

# Import API key
from api_keys import geoapify_key

In [42]:
# Load the CSV file NY real estate price listing into a Pandas DataFrame
csv_file = Path("NY Realstate Pricing.csv")
realestate_data_df = pd.read_csv(csv_file)

# Display sample data
realestate_data_df.head()

Unnamed: 0,F1,id,neighbourhood,latitude,longitude,room_type,price,days_occupied_in_2019,minimum_nights,number_of_reviews,reviews_per_month,availability_2020
0,0,2595,Midtown,40.75362,-73.98377,Entire home/apt,225,15,10,48,0.39,1
1,1,3831,Brooklyn,40.68514,-73.95976,Entire home/apt,89,188,1,295,4.67,1
2,2,5099,Manhattan,40.74767,-73.975,Entire home/apt,200,362,3,78,0.6,19
3,3,5121,Bedford-Stuyvesant,40.68688,-73.95596,Private room,60,0,29,49,0.38,365
4,4,5178,Manhattan,40.76489,-73.98493,Private room,79,141,2,454,3.52,242


In [43]:
# Import the Geoapigy API key
from api_keys import geoapify_key

# Import citipy to determine the  based on latitude and longitude
from citipy import citipy

---

### Step 1: Create a map that displays a point for every rental in the real estate DataFrame. The size of the point is the price of each rental.

In [47]:
%%capture --no-display

# Configure the map plot
map_plot_1 = realestate_data_df.hvplot.points(
    "longitude",
    "latitude",
    geo = True,
    tiles = "OSM",
    frame_width = 800,
    frame_height = 600, 
    size = "price",
    scale = 1.5,
    color = "neighbourhood"
    )
# Display the map
map_plot_1
# Save the figure
#plt.savefig("output/rentalmap.png")

### Step 2: Narrow down the `realestate_data_df` DataFrame to find public transport and eateries

In [69]:
#rank rentals by days occupied
realestate_data_df['rank'] = realestate_data_df["days_occupied_in_2019"].value_counts()
realestate_data_df.head(100)

Unnamed: 0,F1,id,neighbourhood,latitude,longitude,room_type,price,days_occupied_in_2019,minimum_nights,number_of_reviews,reviews_per_month,availability_2020,rank
0,0,2595,Midtown,40.75362,-73.98377,Entire home/apt,225,15,10,48,0.39,1,533.0
1,1,3831,Brooklyn,40.68514,-73.95976,Entire home/apt,89,188,1,295,4.67,1,188.0
2,2,5099,Manhattan,40.74767,-73.97500,Entire home/apt,200,362,3,78,0.60,19,114.0
3,3,5121,Bedford-Stuyvesant,40.68688,-73.95596,Private room,60,0,29,49,0.38,365,106.0
4,4,5178,Manhattan,40.76489,-73.98493,Private room,79,141,2,454,3.52,242,91.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,108,27644,Hamilton Heights,40.82754,-73.94919,Entire home/apt,170,13,2,207,2.05,348,37.0
96,109,27659,Brooklyn,40.66499,-73.97925,Entire home/apt,400,45,2,16,0.22,331,30.0
97,110,27759,Manhattan,40.77842,-73.97556,Entire home/apt,170,275,7,18,0.16,97,31.0
98,112,28321,Lefferts Garden,40.65593,-73.96053,Private room,75,4,2,9,0.08,355,18.0


In [71]:
realestate_data_df['rank'] = realestate_data_df.groupby(['neighbourhood'])['days_occupied_in_2019'].rank(method="first",ascending=False)
print(realestate_data_df['rank'])

0         342.0
1        2147.0
2          64.0
3         781.0
4        1908.0
          ...  
17609    3613.0
17610    3650.0
17611     187.0
17612    3229.0
17613    1967.0
Name: rank, Length: 17614, dtype: float64


In [66]:
realestate_data_df['rank'] = realestate_data_df.groupby(['neighbourhood'])['days_occupied_in_2019'].rank(ascending=False)
print(realestate_data_df['rank'])

0         342.5
1        2150.0
2          79.5
3         790.0
4        1909.0
          ...  
17609    3590.0
17610    3644.0
17611     186.0
17612    3174.0
17613    1965.0
Name: rank, Length: 17614, dtype: float64


In [67]:
realestate_data_df["rank"] = realestate_data_df[["days_occupied_in_2019", "neighbourhood"]].apply(tuple, axis = 1)\
					.rank(method='dense', ascending = False).astype(int)


In [73]:
realestate_data_df.to_csv("realestate.csv")


In [74]:
rank_df = realestate_data_df.loc[realestate_data_df['rank'] <= 10]
rank_df

Unnamed: 0,F1,id,neighbourhood,latitude,longitude,room_type,price,days_occupied_in_2019,minimum_nights,number_of_reviews,reviews_per_month,availability_2020,rank
61,72,18728,Meatpacking District,40.74138,-74.00197,Private room,150,74,4,180,1.69,296,1.0
65,76,19282,Flatbush,40.65401,-73.96323,Entire home/apt,130,362,6,17,0.15,94,6.0
66,77,19601,Brooklyn Heights,40.69723,-73.99268,Entire home/apt,2000,211,1,25,0.23,93,8.0
87,99,26012,Gowanus,40.68157,-73.98989,Entire home/apt,200,323,30,20,0.20,64,10.0
99,113,28907,Manhattan,40.77800,-73.94822,Entire home/apt,150,364,5,21,0.18,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17517,15012,12933540,Park Versailles,40.83049,-73.87354,Shared room,51,276,3,9,0.23,89,6.0
17532,31062,26754694,Woodhaven,40.69883,-73.85142,Shared room,45,345,3,23,1.54,102,1.0
17538,21703,18908252,Kensington,40.64249,-73.97105,Shared room,60,275,4,1,0.06,0,5.0
17557,3929,2772111,Bedford Park,40.86705,-73.88545,Shared room,55,0,7,12,0.17,365,9.0


In [78]:
#average price for the top 10 ranked rentals in each neighbourhood
rank_agg_df = rank_df.groupby(["neighbourhood"]).mean()[["price", "latitude", "longitude"]].reset_index()
rank_agg_df

  rank_agg_df = rank_df.groupby(["neighbourhood"]).mean()[["price", "latitude", "longitude"]].reset_index()


Unnamed: 0,neighbourhood,price,latitude,longitude
0,Allerton,54.8,40.859940,-73.867380
1,Alphabet City,162.9,40.724899,-73.980843
2,Annadale,180.0,40.538710,-74.169660
3,Astoria,105.7,40.761168,-73.923323
4,Bath Beach,116.0,40.604433,-74.005410
...,...,...,...,...
181,Williamsburg,216.7,40.712844,-73.949814
182,Windsor Terrace,154.2,40.656817,-73.979618
183,Woodhaven,69.8,40.690781,-73.858672
184,Woodlawn,85.0,40.899840,-73.869020


In [79]:
%%capture --no-display

# Configure the map plot, average price top 10 rentals in each neighbourhood
map_plot_2 = rank_agg_df.hvplot.points(
    "longitude",
    "latitude",
    geo = True,
    tiles = "OSM",
    frame_width = 800,
    frame_height = 600, 
    size = "price",
    scale = 1.5,
    color = "neighbourhood"
    )
# Display the map
map_plot_2
# Save the figure
#plt.savefig("output/rentalmap.png")

In [85]:
# Use the Pandas copy function to create DataFrame called hotel_df to store the city, country, coordinates, and humidity
rank_agg_tour_df = rank_agg_df[["neighbourhood", "price", "latitude", "longitude"]].copy()

# Add an empty column, "Hotel Name," to the DataFrame so you can store the hotel found using the Geoapify API
rank_agg_tour_df["tourist attraction"] = ""

# Display sample data
rank_agg_tour_df.head()

Unnamed: 0,neighbourhood,price,latitude,longitude,tourist attraction
0,Allerton,54.8,40.85994,-73.86738,
1,Alphabet City,162.9,40.724899,-73.980843,
2,Annadale,180.0,40.53871,-74.16966,
3,Astoria,105.7,40.761168,-73.923323,
4,Bath Beach,116.0,40.604433,-74.00541,


In [87]:
# tourist attractions that are in proximity to the top 10 ranked rental neighbourhoods
# Set parameters to search for a hotel (do i use 'type' or 'categories' for hotel)
radius = 10000
params = {
    "categories": "tourism.attraction",
    "apiKey":geoapify_key,
}

# Print a message to follow up the hotel search
print("Starting tourist attraction search")

# Iterate through the hotel_df DataFrame
for index, row in rank_agg_tour_df.iterrows():
    # get latitude, longitude from the DataFrame
    latitude = row["latitude"]
    longitude = row["longitude"]
    
    # Add filter and bias parameters with the current city's latitude and longitude to the params dictionary
    params["filter"] = f"circle:{longitude},{latitude},{radius}"
    params["bias"] = f"proximity:{longitude},{latitude}"
    
    # Set base URL
    base_url = "https://api.geoapify.com/v2/places"


    # Make an API request using the params dictionaty
    response = requests.get(base_url, params=params)
    
    # Convert the API response to JSON format
    name_address = response.json()
    
    # Grab the first hotel from the results and store the name in the hotel_df DataFrame
    try:
        rank_agg_tour_df.loc[index, "tourist attraction"] = name_address["features"][0]["properties"]["name"]
    except (KeyError, IndexError):
        # If no restaurant is found, set the restaurant name as "No restaurant found".
        rank_agg_tour_df.loc[index, "tourist attraction"] = "No tourist attraction found"
        
    # Log the search results
    print(f"{rank_agg_tour_df.loc[index, 'neighbourhood']} - nearest tourist attraction: {rank_agg_tour_df.loc[index, 'tourist attraction']}")

# Display sample data
rank_agg_tour_df.head(50)

Starting tourist attraction search
Allerton - nearest tourist attraction: Bruce Lee Mural
Alphabet City - nearest tourist attraction: No tourist attraction found
Annadale - nearest tourist attraction: Hail Holy Queen
Astoria - nearest tourist attraction: No tourist attraction found
Bath Beach - nearest tourist attraction: No tourist attraction found
Battery Park City - nearest tourist attraction: Sitting Stance
Bay Ridge - nearest tourist attraction: The Gingerbread House
Baychester - nearest tourist attraction: No tourist attraction found
Bayside - nearest tourist attraction: No tourist attraction found
Bedford Park - nearest tourist attraction: Parking
Bedford-Stuyvesant - nearest tourist attraction: Shai Asia Semple
Bensonhurst - nearest tourist attraction: Mother Teresa Statue
Bergen Beach - nearest tourist attraction: Belt Parkway Lookout
Boerum Hill - nearest tourist attraction: Mosaic House
Borough Park - nearest tourist attraction: No tourist attraction found
Brighton Beach - n

Unnamed: 0,neighbourhood,price,latitude,longitude,tourist attraction
0,Allerton,54.8,40.85994,-73.86738,Bruce Lee Mural
1,Alphabet City,162.9,40.724899,-73.980843,No tourist attraction found
2,Annadale,180.0,40.53871,-74.16966,Hail Holy Queen
3,Astoria,105.7,40.761168,-73.923323,No tourist attraction found
4,Bath Beach,116.0,40.604433,-74.00541,No tourist attraction found
5,Battery Park City,197.0,40.708755,-74.016465,Sitting Stance
6,Bay Ridge,155.4,40.626961,-74.028233,The Gingerbread House
7,Baychester,64.5,40.867334,-73.843064,No tourist attraction found
8,Bayside,138.1,40.761222,-73.768533,No tourist attraction found
9,Bedford Park,60.111111,40.87112,-73.88714,Parking


In [89]:
%%capture --no-display

# Configure the map plot
tourist_map = rank_agg_tour_df.hvplot.points(
    "longitude",
    "latitude",
    geo = True,
    tiles = "OSM",
    frame_width = 800,
    frame_height = 600,
    size = "price",
    scale = 1,
    color = "tourist attraction",
    hover_cols = ["tourist attraction", "neighbourhood"]
)

# Display the map
tourist_map

In [26]:
# Narrow down cities that fit criteria and drop any results with null values

perferred_rentals = realestate_data_df[(realestate_data_df['price'] < 1000) &
                             (realestate_data_df['price'] > 30) &
                             (realestate_data_df['number_of_reviews'] < 5)
                             ]
perferred_rentals = perferred_rentals.dropna() 


# Display sample data
perferred_rentals.head(10)

Unnamed: 0,F1,id,neighbourhood,latitude,longitude,room_type,price,days_occupied_in_2019,minimum_nights,number_of_reviews,reviews_per_month,availability_2020,rank
77,89,21644,Manhattan,40.82803,-73.94731,Private room,89,0,1,3,0.21,365,3174.0
104,119,31902,Flatlands,40.63188,-73.93248,Private room,77,187,2,4,0.03,178,4.0
108,123,32100,Greenpoint,40.73409,-73.95348,Entire home/apt,250,302,29,3,0.03,156,46.5
110,125,32329,Manhattan,40.72895,-73.98377,Entire home/apt,160,13,14,3,0.03,349,2883.0
111,127,32363,Kew Garden Hills,40.74028,-73.83168,Private room,140,242,2,1,0.01,63,2.0
118,135,36442,Brooklyn,40.6741,-73.96595,Entire home/apt,115,47,15,4,0.05,307,2831.5
180,211,59121,Queens,40.70411,-73.89934,Entire home/apt,145,0,4,2,0.02,358,1081.0
185,217,60673,Manhattan,40.81583,-73.94707,Private room,175,0,2,3,0.21,365,3174.0
255,306,81739,Brooklyn,40.73842,-73.95312,Private room,249,9,3,2,0.02,22,3590.0
308,374,127387,Gowanus,40.66862,-73.9926,Entire home/apt,260,207,30,3,0.03,314,21.0


### Step 3: Create a new DataFrame called `rentals_df`.

In [28]:
# Use the Pandas copy function to create DataFrame called hotel_df to store the city, country, coordinates, and humidity
rentals_df = perferred_rentals[["neighbourhood", "room_type", "days_occupied_in_2019","price", "latitude", "longitude"]].copy()

# Add an empty column, "Hotel Name," to the DataFrame so you can store the hotel found using the Geoapify API
rentals_df["restuarant Name"] = ""

# Display sample data
rentals_df.head()

Unnamed: 0,neighbourhood,room_type,days_occupied_in_2019,price,latitude,longitude,restuarant Name
77,Manhattan,Private room,0,89,40.82803,-73.94731,
104,Flatlands,Private room,187,77,40.63188,-73.93248,
108,Greenpoint,Entire home/apt,302,250,40.73409,-73.95348,
110,Manhattan,Entire home/apt,13,160,40.72895,-73.98377,
111,Kew Garden Hills,Private room,242,140,40.74028,-73.83168,


### Step 4: For each rental, use the Geoapify API to find the first restuarant located within 10,000 metres of your coordinates.

In [32]:
# Set parameters to search for a hotel (do i use 'type' or 'categories' for hotel)
radius = 10000
params = {
    "categories": "catering.restaurant",
    "apiKey":geoapify_key,
}

# Print a message to follow up the hotel search
print("Starting restaurant search")

# Iterate through the hotel_df DataFrame
for index, row in rentals_df.iterrows():
    # get latitude, longitude from the DataFrame
    latitude = row["latitude"]
    longitude = row["longitude"]
    
    # Add filter and bias parameters with the current city's latitude and longitude to the params dictionary
    params["filter"] = f"circle:{longitude},{latitude},{radius}"
    params["bias"] = f"proximity:{longitude},{latitude}"
    
    # Set base URL
    base_url = "https://api.geoapify.com/v2/places"


    # Make an API request using the params dictionaty
    response = requests.get(base_url, params=params)
    
    # Convert the API response to JSON format
    name_address = response.json()
    
    # Grab the first hotel from the results and store the name in the hotel_df DataFrame
    try:
        rentals_df.loc[index, "restaurant Name"] = name_address["features"][0]["properties"]["name"]
    except (KeyError, IndexError):
        # If no restaurant is found, set the restaurant name as "No restaurant found".
        rentals_df.loc[index, "restaurant Name"] = "No restaurant found"
        
    # Log the search results
    print(f"{rentals_df.loc[index, 'neighbourhood']} - nearest restaurant: {rentals_df.loc[index, 'restaurant Name']}")

# Display sample data
rentals_df.head(50)

Starting restaurant search
Manhattan - nearest restaurant: Camila's Tasting Room
Flatlands - nearest restaurant: New Hot Wok
Greenpoint - nearest restaurant: Casanova
Manhattan - nearest restaurant: Joe & Pat's
Kew Garden Hills - nearest restaurant: Happy Garden Restaurant
Brooklyn - nearest restaurant: Cheryl's Global Soul
Queens - nearest restaurant: Forest Steakhouse Bar
Manhattan - nearest restaurant: A Taste of Seafood
Brooklyn - nearest restaurant: Ashbox Cafe
Gowanus - nearest restaurant: Surfish
Manhattan - nearest restaurant: Omen Azen
Brooklyn - nearest restaurant: Hana Makgeoli
Manhattan - nearest restaurant: Gracie Mews Diner
Brooklyn - nearest restaurant: Giuseppina’s
Queens - nearest restaurant: No restaurant found
Greenpoint - nearest restaurant: Glasserie
Brooklyn - nearest restaurant: Padishah
Lower East Side - nearest restaurant: Kings Co Imperial LES
Brooklyn - nearest restaurant: Mexico Authentico
Manhattan - nearest restaurant: Mexico Lindo
Lower East Side - neares

Unnamed: 0,neighbourhood,room_type,days_occupied_in_2019,price,latitude,longitude,restuarant Name,restaurant Name
77,Manhattan,Private room,0,89,40.82803,-73.94731,,Camila's Tasting Room
104,Flatlands,Private room,187,77,40.63188,-73.93248,,New Hot Wok
108,Greenpoint,Entire home/apt,302,250,40.73409,-73.95348,,Casanova
110,Manhattan,Entire home/apt,13,160,40.72895,-73.98377,,Joe & Pat's
111,Kew Garden Hills,Private room,242,140,40.74028,-73.83168,,Happy Garden Restaurant
118,Brooklyn,Entire home/apt,47,115,40.6741,-73.96595,,Cheryl's Global Soul
180,Queens,Entire home/apt,0,145,40.70411,-73.89934,,Forest Steakhouse Bar
185,Manhattan,Private room,0,175,40.81583,-73.94707,,A Taste of Seafood
255,Brooklyn,Private room,9,249,40.73842,-73.95312,,Ashbox Cafe
308,Gowanus,Entire home/apt,207,260,40.66862,-73.9926,,Surfish


### Step 5: Add the restaurant name and the neightbourhood as additional information in the hover message for each rental in the map.

In [35]:
%%capture --no-display

# Configure the map plot
restaurant_map = rentals_df.hvplot.points(
    "longitude",
    "latitude",
    geo = True,
    tiles = "OSM",
    frame_width = 800,
    frame_height = 600,
    size = "days_occupied_in_2019",
    scale = 1,
    color = "price",
    hover_cols = ["restaurant Name", "neighbourhood"]
)

# Display the map
restaurant_map

In [None]:
# generate a map with a radius of public transport - subway from the most popular rentals

In [95]:
# Use the Pandas copy function to create DataFrame called public trans_df to store the neighbourhood, country, coordinates
public_trans_df = rank_agg_df[["neighbourhood", "price", "latitude", "longitude"]].copy()

# Add an empty column, "subway entrance," to the DataFrame so you can store the hotel found using the Geoapify API
public_trans_df["subway entrance"] = ""

# Display sample data
public_trans_df.head()

Unnamed: 0,neighbourhood,price,latitude,longitude,subway entrance
0,Allerton,54.8,40.85994,-73.86738,
1,Alphabet City,162.9,40.724899,-73.980843,
2,Annadale,180.0,40.53871,-74.16966,
3,Astoria,105.7,40.761168,-73.923323,
4,Bath Beach,116.0,40.604433,-74.00541,


In [101]:
# Set parameters to search for a subway entrance 
radius = 10000
params = {
    "categories": "public_transport.subway.entrance",
    "apiKey":geoapify_key,
}

# Print a message to follow up the attraction search
print("Starting subway entrance search")

# Iterate through the trans_df DataFrame
for index, row in public_trans_df.iterrows():
    # get latitude, longitude from the DataFrame
    latitude = row["latitude"]
    longitude = row["longitude"]
    
    # Add filter and bias parameters with the current neighbourhood's latitude and longitude to the params dictionary
    params["filter"] = f"circle:{longitude},{latitude},{radius}"
    params["bias"] = f"proximity:{longitude},{latitude}"
    
    # Set base URL
    base_url = "https://api.geoapify.com/v2/places"


    # Make an API request using the params dictionaty
    response = requests.get(base_url, params=params)
    
    # Convert the API response to JSON format
    name_address = response.json()
    
    # Grab the first subway entrance from the results and store the name in the public trans df DataFrame
    try:
        public_trans_df.loc[index, "subway entrance"] = name_address["features"][0]["properties"]["name"]
    except (KeyError, IndexError):
        # If no subway entrance is found, set the subway entrance name as "No subway entrance found".
        public_trans_df.loc[index, "subway entrance"] = "No subway found"
        
    # Log the search results
    #print(f"{public_trans_df.loc[index, 'neighbourhood']} - nearest subway entrance: {public_trans_df[index, 'subway entrance']}")

# Display sample data
public_trans_df.head(50)

Starting subway entrance search


Unnamed: 0,neighbourhood,price,latitude,longitude,subway entrance
0,Allerton,54.8,40.85994,-73.86738,No subway found
1,Alphabet City,162.9,40.724899,-73.980843,No subway found
2,Annadale,180.0,40.53871,-74.16966,No subway found
3,Astoria,105.7,40.761168,-73.923323,No subway found
4,Bath Beach,116.0,40.604433,-74.00541,No subway found
5,Battery Park City,197.0,40.708755,-74.016465,No subway found
6,Bay Ridge,155.4,40.626961,-74.028233,No subway found
7,Baychester,64.5,40.867334,-73.843064,No subway found
8,Bayside,138.1,40.761222,-73.768533,7 Train Stair Entrance
9,Bedford Park,60.111111,40.87112,-73.88714,Bedford Park Blvd.


In [103]:
%%capture --no-display

# Configure the map plot
subway_map = public_trans_df.hvplot.points(
    "longitude",
    "latitude",
    geo = True,
    tiles = "OSM",
    frame_width = 800,
    frame_height = 600,
    size = "price",
    scale = 1,
    color = "neighbourhood",
    hover_cols = ["subway entrance", "neighbourhood"]
)

# Display the map
subway_map