# AirBnb pricing tool for Manhattan NY

## Modules used in this proyect

In [1]:
import pandas as pd
import numpy as np

import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt

import folium as fl
import folium.plugins
from folium.plugins import HeatMap
import requests
import urllib.parse

from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors , KNeighborsClassifier

import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

In [2]:
# -----------------------------------------------------------------------------------------------------------------------------
def AirBnb_borough(BOROUGH):
    
    if BOROUGH == "Bronx":   
        listings = pd.read_csv("Bronx_listings.csv")
        calendar = pd.read_csv("Bronx_calendar.csv")
        amenities = pd.read_csv("Bronx_amenities.csv")
        
        return listings, calendar, amenities
    
    if BOROUGH == "Brooklyn":
        listings = pd.read_csv("Brooklyn_listings.csv")
        calendar = pd.read_csv("Brooklyn_calendar.csv")
        amenities = pd.read_csv("Brooklyn_amenities.csv")
        
        return listings, calendar, amenities
    
    if BOROUGH == "Manhattan":
        listings = pd.read_csv("Manhattan_listings.csv")
        calendar = pd.read_csv("Manhattan_calendar.csv")
        amenities = pd.read_csv("Manhattan_amenities.csv")
        
        return listings, calendar, amenities
       
    if BOROUGH == "Queens":
        listings = pd.read_csv("Queens_listings.csv")
        calendar = pd.read_csv("Queens_calendar.csv")
        amenities = pd.read_csv("Queens_amenities.csv")
        
        return listings, calendar, amenities
    
    if BOROUGH == "Staten Island":
        listings = pd.read_csv("StatenIsland_listings.csv")
        calendar = pd.read_csv("StatenIsland_calendar.csv")
        amenities = pd.read_csv("StatenIsland_amenities.csv")
        
        return listings, calendar, amenities

# -----------------------------------------------------------------------------------------------------------------------------
def AirBnb_types(listings, calendar, amenities, ROOM, PROPERTY):
    
    listings = listings[listings["room_type"]==ROOM]
    listings = listings[listings["property_type"]==PROPERTY]
    calendar = calendar[calendar["id"].isin(listings.id)]
    amenities = amenities[amenities["id"].isin(listings.id)]
    
    return listings, calendar, amenities

#------------------------------------------------------------------------------------------------------------------------------
def AirBnb_cluster(listings, calendar, amenities, ADDRESS):

    #Finding the lat and lon of the address
    url = "https://nominatim.openstreetmap.org/search/" + urllib.parse.quote(ADDRESS) + "?format=json"
    response = requests.get(url).json()
    
    query = pd.DataFrame(columns =["latitude" , "longitude"], index = range(1))
    query["latitude"] = (response[0]["lat"])
    query["longitude"] = (response[0]["lon"])

    #Subseting the df for the model
    borough_model = listings[["cluster" , "latitude" , "longitude"]].fillna(0)
    
    X = borough_model[["latitude" , "longitude"]]
    y = borough_model["cluster"]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    knc = KNeighborsClassifier(n_neighbors = 10 , algorithm = "ball_tree" , metric = "euclidean")
    knc.fit(X_train, y_train)
    
    #Creating the model
    knc = KNeighborsClassifier(n_neighbors = 10 ,metric = "euclidean") 
    knc.fit(X_train, y_train)
    cluster_number = knc.predict(query)[0]
    
    listings = listings[listings["cluster"]==cluster_number]
    amenities = amenities[amenities["cluster"]==cluster_number]
    calendar = calendar[calendar["id"].isin(listings.id)].drop(["id"], axis = 1)
     
    neighbour = round(listings.neighbourhood_cleansed.value_counts()/
                      listings.neighbourhood_cleansed.value_counts()[0:].sum()*100,2).head(3)
    
    return listings, calendar, amenities, neighbour, query, cluster_number

 # ------------------------------------------------------------------------------------------------------------------------------
def AirBnb_map(listings, query):
       
    listings = listings[["latitude","longitude"]]

    borough_map = fl.Map(location=[listings.latitude.mean(),listings.longitude.mean()],zoom_start = 13)
    HeatMap(data = listings, radius = 9, max_zoom = 10).add_to(borough_map)
    fl.Marker([query["latitude"], query["longitude"]],radius=1, icon=fl.Icon(color = "black")).add_to(borough_map)
        
    return borough_map

#-------------------------------------------------------------------------------------------------------------------------------
def AirBnb_offerts(cluster_listings):

    cd = pd.DataFrame({"accommodates":
                        [cluster_listings["accommodates"].min(),
                        cluster_listings["accommodates"].max(),
                        cluster_listings["accommodates"].value_counts().index[0],
                        cluster_listings["accommodates"].value_counts().values[0],
                        cluster_listings["accommodates"].sum()],
                        "beds":
                        [cluster_listings["beds"].min(),
                        cluster_listings["beds"].max(),
                        cluster_listings["beds"].value_counts().index[0],
                        cluster_listings["beds"].value_counts().values[0],
                        cluster_listings["beds"].sum()],
                        "bedrooms": 
                        [cluster_listings["bedrooms"].min(),
                        cluster_listings["bedrooms"].max(),
                        cluster_listings["bedrooms"].value_counts().index[0],
                        cluster_listings["bedrooms"].value_counts().values[0],
                        cluster_listings["bedrooms"].sum()]}).transpose()

    cd.columns = ["min","max","mode","count","total"]
    cd["percent"] = round(cd["count"]/cd["total"]*100,2)
    cd = cd.drop(["count","total"], axis = 1)
    cd = cd.transpose()
    
    return cd

# ------------------------------------------------------------------------------------------------------------------------------    

def AirBnb_calendar(cluster_calendar, PRICES):
    
    if PRICES == "Availability":
        avaiable = cluster_calendar[cluster_calendar["year"]==2021]
        return avaiable
    
    if PRICES == "Historical":    
        historic = cluster_calendar[cluster_calendar["year"]!=2021]
        return historic
    
# ------------------------------------------------------------------------------------------------------------------------------    

normal_prices = pd.DataFrame(columns=["basic","fixed"],index=[1])


def AirBnb_prices(calendar):

    basic_values = calendar["price"].mean()   
    
    q1 = calendar["price"].quantile(.10)
    q3 = calendar["price"].quantile(.90)
    calendar = calendar[calendar["price"].between(q1, q3, inclusive=True)]
       
    fixed_values = calendar["price"].mean()     
    
    normal_prices["basic"] = basic_values
    normal_prices["fixed"] = fixed_values
    
    return normal_prices.round(2)

# ------------------------------------------------------------------------------------------------------------------------------    

montly_prices = pd.DataFrame(columns=["Basic","Fixed","Available"],
                             index = ["JAN","FEB","MAR","APR","MAY","JUN","JUL","AGO","SEP","OCT","NOV","DIC"])
month_basic = []
month_fixed = []


def AirBnb_monthly(calendar):
        
    m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12 = [x for _, x in calendar.groupby(calendar["month"])] 
    
    months_basic = [m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12]
    months_fixed = [m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12]  
    
    for df in months_basic:
        df = df.drop(["year","month"], axis =1)
        df = round(df["price"].mean(), 2)
        month_basic.append(df)
    
    for df in months_fixed:
        df = df.drop(["year","month"], axis =1)
        df = round(df.loc[df["price"].between(df["price"].quantile(0.10), df["price"].quantile(0.90), inclusive=True)].mean(), 2)
        month_fixed.append(df.max()) 
        
    calendar = calendar.groupby("month").count().reset_index()
    iqr_cal = (calendar["price"].between(calendar["price"].quantile(.25), calendar["price"].quantile(.75), inclusive=True)).to_list()

    increment = [0.05 if x == True else 0.10 for x in iqr_cal]
    avaiability = calendar["price"] - calendar["price"].mean()
    sign = [1 if x < 0 else -1 for x in avaiability.values]
    
    montly_prices["Basic"] = month_basic[-12:]
    montly_prices["Fixed"] = month_fixed[-12:]
    montly_prices["Available"] = round(montly_prices["Basic"] + (montly_prices["Basic"] * increment * sign),2)
    montly_prices.index.name = "Month" 
    
    return montly_prices

# ------------------------------------------------------------------------------------------------------------------------------    

def AirBnb_amenities(cluster_amenities):
    
    amenities = cluster_amenities.melt(id_vars =["id","cluster"], var_name = "amenitie", value_name = "count")
    amenities = amenities[amenities["count"]==1].drop(["id","cluster"], axis = 1)
    amenities = amenities.groupby("amenitie").count().sort_values("count", ascending = False).reset_index()
    amenities = amenities.head(10)

    return amenities

def AirBnb_pricing(BOROUGH, ADDRESS, ROOM, PROPERTY, PRICES):
    
    listings, calendar, amenities = AirBnb_borough(BOROUGH)
    listings, calendar, amenities = AirBnb_types(listings, calendar, amenities, ROOM, PROPERTY)
    cluster_listings, cluster_calendar, cluster_amenities, cluster_neighbour, query, cluster_number = AirBnb_cluster(listings, calendar, amenities, ADDRESS)
    cluster_map = AirBnb_map(cluster_listings, query)
    calendar = AirBnb_calendar(cluster_calendar, PRICES)
    normaly_prices = AirBnb_prices(calendar)
    monthly_prices = AirBnb_monthly(calendar)
    topamenities = AirBnb_amenities(cluster_amenities)
    offerts = AirBnb_offerts(cluster_listings)

    return print(
"This application gives you two types of prices: static and monthly prices.\n\n"
      
"The static prices are based on the average of the listings present in this area, here we give you two values:\n\n"

"- The basic price is the average value of the prices for this area without distinguishing dates or outlier values\n\n"

"- The fixed price is the average value of the prices for this area excluding the 10% of the lowest and highest values,\n"
"  therefore this price does not take into account excessively minimum or maximum values.\n\n"
      
"\t Basic price:",normaly_prices["basic"].values[0], "USD.\n"
"\t Fixed price:",normaly_prices["fixed"].values[0], "USD.\n\n"
   
      
"The monthly prices are the prices that take into account the values registered in this area segregating by month\n"
"of these we present three values: basic price, fixed price and availability price.\n\n",
    
    monthly_prices.to_markdown(),"\n\n",   

"- The basic and fixed prices meet the same description as their counterparts in the static prices,\n"
"  only differing in that they are calculated for each of the months.\n\n"

"- The availability price is based on the number of listings present during the month, if a month has a high number of AirBnb\n"
"  available the price will be lower to compete; on the contrary, if a month has a low supply, the price will be higher.\n"
"  The percentage of change is reflected in the proximity of the monthly value with the annual average where if the month \n"
"  presents values close to the annual average, the change will be +/- 5%, if not the change will be +/- 10 %.\n\n"
      
"--------------------------------------------------------------------------------------------------------------------------\n\n"
          
" Accommodates in this area:\n",
"The range of offer for this area is from", offerts.iloc[:,0].values[0],"to",offerts.iloc[:,0].values[1],"people",
"and most of it is offered for",offerts.iloc[:,0].values[2],"(the", offerts.iloc[:,0].values[3], "% of the total).\n\n"
      
" Bedrooms in this area:\n",
"The range of offer for this area is from", offerts.iloc[:,2].values[0],"to",offerts.iloc[:,2].values[1],"people",
      "and most of it is offered for",offerts.iloc[:,2].values[2],"(the", offerts.iloc[:,2].values[3], "% of the total).\n\n"
      
" Beds in this area:\n",
"The range of offer for this area is from", offerts.iloc[:,1].values[0],"to",offerts.iloc[:,1].values[1],"people",
"and most of it is offered for",offerts.iloc[:,1].values[2],"(the", offerts.iloc[:,1].values[3], "% of the total).\n\n"
      
"--------------------------------------------------------------------------------------------------------------------------\n\n"
      
" The top 10 amenities present in this area are:\n\n"
"\t 1", topamenities.iloc[:,0].values[0],"\n"
"\t 2", topamenities.iloc[:,0].values[1],"\n"
"\t 3", topamenities.iloc[:,0].values[2],"\n"
"\t 4", topamenities.iloc[:,0].values[3],"\n"
"\t 5", topamenities.iloc[:,0].values[4],"\n"
"\t 6", topamenities.iloc[:,0].values[5],"\n"
"\t 7", topamenities.iloc[:,0].values[6],"\n"
"\t 8", topamenities.iloc[:,0].values[7],"\n"
"\t 9", topamenities.iloc[:,0].values[8],"\n"
"\t 10", topamenities.iloc[:,0].values[9],"\n\n"
      
"--------------------------------------------------------------------------------------------------------------------------\n\n"
"This is your map, the black marker is where your AirBnb is located and the colored area indicates all the AirBnb in your group.\n"
"(The red color indicates a higher density of AirBnb while the purple indicates the opposite).\n\n"

"The top 3 neighbours with more AirBnb in this area are:\n\n"
"\t 1","(", (cluster_neighbour.values[0]),"%)",cluster_neighbour.index[0],"\n"
"\t 2","(", cluster_neighbour.values[1],"%)",cluster_neighbour.index[1],"\n"
"\t 3","(", cluster_neighbour.values[2],"%)",cluster_neighbour.index[2],"\n"      
        
      
),display(cluster_map)

In [3]:
print(
"Welcome to the AirBnb NY pricing tool!\n\n",
"This app use open data from Inside AirBnb to generate prices using a classification system where we associate\n"
"your address with a pre-established group of existing listings in our data.\n\n"

"Borough: the borough where your AirBnb is located.\n\n"
"Address: the address of your AirBnb.\n\n"
"Room: the type of rental in which you are interested:\n"
"\tEntiere home / app: rent the entire property.\n"
"\tPrivate room: rent one of the rooms of the property for a single guest.\n"
"\tShared room: rent one of the rooms of the property for more than one guest\n\n"
"Property: the type of your property (Apartment, House, Loft, Condominium, Townhouse)\n\n"
"Price: price system according to historical or availability data:\n"
"\tHistorical: price data is used between 2018 and 2020.\n"
"\tAvailability: the data already registered for 2021 is used (last update: DEC-2020)"    
)

Welcome to the AirBnb NY pricing tool!

 This app use open data from Inside AirBnb to generate prices using a classification system where we associate
your address with a pre-established group of existing listings in our data.

Borough: the borough where your AirBnb is located.

Address: the address of your AirBnb.

Room: the type of rental in which you are interested:
	Entiere home / app: rent the entire property.
	Private room: rent one of the rooms of the property for a single guest.
	Shared room: rent one of the rooms of the property for more than one guest

Property: the type of your property (Apartment, House, Loft, Condominium, Townhouse)

Price: price system according to historical or availability data:
	Historical: price data is used between 2018 and 2020.
	Availability: the data already registered for 2021 is used (last update: DEC-2020)


In [5]:
BOROUGH = "Manhattan"
ADDRESS = "228 W 10th St, New York, NY 10014, United States"
ROOM ="Entire home/apt"
PROPERTY = "Apartment"
PRICES = "Historical"

AirBnb_pricing(BOROUGH, ADDRESS, ROOM, PROPERTY, PRICES)

This application gives you two types of prices: static and monthly prices.

The static prices are based on the average of the listings present in this area, here we give you two values:

- The basic price is the average value of the prices for this area without distinguishing dates or outlier values

- The fixed price is the average value of the prices for this area excluding the 10% of the lowest and highest values,
  therefore this price does not take into account excessively minimum or maximum values.

	 Basic price: 289.74 USD.
	 Fixed price: 240.6 USD.

The monthly prices are the prices that take into account the values registered in this area segregating by month
of these we present three values: basic price, fixed price and availability price.

 | Month   |   Basic |   Fixed |   Available |
|:--------|--------:|--------:|------------:|
| JAN     |  289.57 |  224.36 |      260.61 |
| FEB     |  294.51 |  226.6  |      265.06 |
| MAR     |  307.78 |  234.44 |      292.39 |
| APR  

(None, None)