# Airbnb Pricing Analysis
By Location<br>
Updated: 2023-01-07

In [72]:
# Import required libraries
from mysql import mysql_cnxn
import plotly.express as px
import pandas as pd

In [2]:
# Using SQL Magic to interact with the MySQL database
%load_ext sql

In [3]:
# Establish the connection to the MySQL database
%sql $mysql_cnxn

In [4]:
# Configure output to be returned as a Pandas dataframe.
%config SqlMagic.autopandas = True


| Column name | Description |
| --- | --- |
| id | Listing id |
| host_id | Host id |
| neighborhood_group | Neighbourhood group the listing is in |
| neighborhood | Neighbourhood the listing is in |
| latitude | Latitude coordinate of listing location |
| longitude | Longitude coordinate of listing location |
| room_type | Room type of the listing |
| price | Price of the listing |
| minimum_nights | Minimum number of nights stay for listing |
| number_of_reviews | Number of reviews for listing |
| total_host_listings | Number of listings the host has |
| availability_365 | The availability of the listing in the next 365 days |
| reviews_in_last_yr | Number of reviews of listing in last 12 months |

In [103]:
# Check connection and configuration
%sql SELECT * FROM airbnb.listings;

 * mysql+pymysql://root:***@localhost/airbnb
39851 rows affected.


Unnamed: 0,id,name,host_id,host_name,neighborhood_group,neighborhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm,license
0,77765,Superior @ Box House,417504,The Box House Hotel,Brooklyn,Greenpoint,40.737770,-73.953660,Hotel room,308,2,42,2022-07-18,0.30,30,217,4,
1,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.645290,-73.972380,Private room,299,30,9,2018-10-19,0.11,9,356,0,
2,45910,Beautiful Queens Brownstone! - 5BR,204539,Mark,Queens,Ridgewood,40.703090,-73.899630,Entire home/apt,425,30,13,2019-11-12,0.10,6,365,0,
3,45935,Room in Beautiful Townhouse.,204586,L,Bronx,Mott Haven,40.806350,-73.922010,Private room,60,30,0,NaT,,1,83,0,
4,45936,Couldn't Be Closer To Columbia Uni,867225,Rahul,Manhattan,Morningside Heights,40.806300,-73.959850,Private room,75,31,135,2022-07-11,0.95,1,219,4,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39846,27577588,Luxury Studio ON Grove Street E0C - B1CA,37412692,Kim,Manhattan,Ellis Island,40.718220,-74.037940,Entire home/apt,135,365,2,2019-09-16,0.04,7,365,0,
39847,654151117629853651,Lovely 3- bedroom apartment,117540494,Miriam,Queens,Rosedale,40.647244,-73.720088,Entire home/apt,180,1,5,2022-08-24,1.92,1,0,5,
39848,553754115911961053,Trendy 3-bedroom apartment near Manhattan,15048320,India,Manhattan,Upper West Side,40.787320,-74.004470,Entire home/apt,240,5,18,2022-08-22,2.87,1,152,18,
39849,698195550745703156,"Luxurious private waterfront terrace, 2BR 2BA Apt",151487807,Asser,Brooklyn,Williamsburg,40.709192,-73.970121,Entire home/apt,400,30,0,NaT,,1,311,0,


### Aggregations for the entire dataset

In [6]:
%%sql
SELECT
    COUNT(id)                          AS listings,
    COUNT(DISTINCT host_id)            AS hosts,
    COUNT(DISTINCT neighborhood_group) AS neighborhood_groups,
    COUNT(DISTINCT neighborhood)       AS neighborhoods,
    COUNT(DISTINCT room_type)          AS room_types,
    ROUND(AVG(price), 2)               AS avg_price,
    MIN(price)                         AS min_price,
    MAX(price)                         AS max_price
FROM
    airbnb.listings;

 * mysql+pymysql://root:***@localhost/airbnb
1 rows affected.


Unnamed: 0,listings,hosts,neighborhood_groups,neighborhoods,room_types,avg_price,min_price,max_price
0,39851,26263,5,244,4,197.7,10,16500


In [7]:
%sql price_df << SELECT price FROM airbnb.listings;

 * mysql+pymysql://root:***@localhost/airbnb
39851 rows affected.
Returning data to local variable price_df


In [8]:
price_df.median()

price    130.0
dtype: float64

In [9]:
# Check the skew
price_df.skew()

price    18.615593
dtype: float64

<b>There are outliers, in price, too the higher, right, side of the dataset</b><br>
Dataset is right skewed.

In [50]:
# Visualize the distribution with histogram
fig = px.histogram(price_df,
                   x="price",
                   nbins=100,
                   title="Price Distribution",
                   labels={"price": "Price (USD)"})
fig.show()

In [14]:
price_df.value_counts().to_frame().head(1)

Unnamed: 0_level_0,0
price,Unnamed: 1_level_1
150,1164


In [47]:
%%sql
SELECT
    *,
    ROUND(((listings_below_200 / total_listings) * 100), 2) AS percent_below_200
FROM
    (SELECT
        COUNT(id) AS total_listings,
        (SELECT COUNT(id) FROM airbnb.listings WHERE price <= 199) AS listings_below_200
    FROM
        airbnb.listings) AS listings_dist;

 * mysql+pymysql://root:***@localhost/airbnb
1 rows affected.


Unnamed: 0,total_listings,listings_below_200,percent_below_200
0,39851,28225,70.83


<b> 70.83% of all listings are priced between 0 and 199.00 dollars</b>
<h3>Quick Feature Summary</h3>
<pre>
           <b>Listings:</b> 39,851
              <b>Hosts:</b> 26,263
<b>Neighborhood Groups:</b> 5
      <b>Neighborhoods:</b> 244
         <b>Room Types:</b> 4
<b>Average Price (USD):</b> 197.70
<b>Minimum Price (USD):</b> 10.00
 <b>Median Price (USD):</b> 130.00
<b>Maximum Price (USD):</b> 16,500.00

<i>The price of 150.00 is the most common price point, appearing 1164 times.</i>
</pre>

#### What factors affect the price of a listing?
- Listings in Manhattan will have higher prices.
- Entire home/apt listings will have higher prices.
- Does the neighborhood impact price?
- Do the number and/or quality of reviews reflect the price?

## Location

#### Top 10 Neighbohoods by average price

In [51]:
%%sql top_10_neighborhoods <<
SELECT
    neighborhood,
    COUNT(id) AS total_listings,
    ROUND(AVG(price), 2) AS avg_price
FROM
    airbnb.listings
GROUP BY
    neighborhood
ORDER BY
    avg_price DESC
LIMIT 10;

 * mysql+pymysql://root:***@localhost/airbnb
10 rows affected.
Returning data to local variable top_10_neighborhoods


In [52]:
fig = px.bar(data_frame=top_10_neighborhoods, 
             x="neighborhood", 
             y="avg_price",
             color="neighborhood", 
             title="Top 10 Neighborhoods by Average Prices",
             labels={"neighborhood": "Neighborhood", "avg_price": "Avg. Price (USD)", "total_listings": "Total Listings"},
             text="avg_price",
             hover_data=["total_listings"])
fig.show()

#### Prospect Park and Fort Wadsworth have the highest average price for listings in their neighborhood.  The values are skewed do to the low number of listings for each.
Prospect Park has 7 listings and Fort Wadsworth only has 1.

#### Are there any outliers?

In [53]:
%%sql
SELECT
    id,
    host_id,
    room_type,
    price
FROM
    airbnb.listings
WHERE
    neighborhood = "Prospect Park"
ORDER BY price DESC;

 * mysql+pymysql://root:***@localhost/airbnb
7 rows affected.


Unnamed: 0,id,host_id,room_type,price
0,675440153877558942,124769036,Entire home/apt,3000
1,587755922527982462,26368066,Entire home/apt,320
2,16490732,107933494,Entire home/apt,301
3,610109288288353127,4042988,Entire home/apt,280
4,599181487210203150,453024622,Entire home/apt,279
5,2867463,13601944,Entire home/apt,250
6,645453326858592715,53636618,Entire home/apt,142


Listing ID: 675440153877558942	has a price of 3,000 while no other listing is more than 320.

#### What are the bottom 10 Neighborhoods by average listing price?

In [54]:
%%sql bottom_10_neighborhoods <<
SELECT
    neighborhood,
    COUNT(id) AS total_listings,
    ROUND(AVG(price), 2) AS avg_price
FROM
    airbnb.listings
GROUP BY
    neighborhood
ORDER BY
    avg_price
LIMIT 10;

 * mysql+pymysql://root:***@localhost/airbnb
10 rows affected.
Returning data to local variable bottom_10_neighborhoods


In [55]:
fig = px.bar(data_frame=bottom_10_neighborhoods, 
             x="neighborhood", 
             y="avg_price",
             color="neighborhood", 
             title="Bottom 10 Neighborhoods by Average Prices",
             labels={"neighborhood": "Neighborhood", "avg_price": "Avg. Price (USD)", "total_listings": "Total Listings"},
             text="avg_price",
             hover_data=["total_listings"])
fig.show()

<b>With some neighborhoods having 1 listing, comparing average prices is highly susceptible to outliers.</b>

#### Questions
Where do the neighborhoods lie geographically?<br>
Does proximity to downtown affect the price? Waterfront?<br>
Could there be other factors? Traffic? Crime? Shopping?<br>
Linear Regression problem?

In [56]:
del top_10_neighborhoods
del bottom_10_neighborhoods

### Average price by 'neighborhood_group'

In [57]:
%%sql neighborhood_groups <<
SELECT
    neighborhood_group,
    COUNT(DISTINCT neighborhood) AS neighborhoods,
    COUNT(id) AS listings,
    ROUND(AVG(price), 2) AS avg_price,
    MAX(price) AS max_price,
    MIN(price) AS min_price
FROM
    airbnb.listings
GROUP BY
    neighborhood_group
ORDER BY avg_price DESC;

 * mysql+pymysql://root:***@localhost/airbnb
5 rows affected.
Returning data to local variable neighborhood_groups


In [60]:
neighborhood_groups

Unnamed: 0,neighborhood_group,neighborhoods,listings,avg_price,max_price,min_price
0,Manhattan,34,16823,265.31,16500,10
1,Brooklyn,51,14841,157.97,10000,10
2,Staten Island,46,446,143.16,2500,33
3,Queens,58,6174,131.39,10000,10
4,Bronx,55,1567,124.82,9994,10


In [58]:
fig = px.bar(data_frame=neighborhood_groups, 
             x="neighborhood_group", 
             y="avg_price",
             color="neighborhood_group", 
             title="Average Price by Neighborhood Group",
             labels={"neighborhood_group": "Neighborhood Group",
                     "avg_price": "Avg. Price (USD)", "listings": "Total Listings",
                    "neighborhoods": "Neighborhoods", "max_price": "Max Price (USD)",
                    "min_price": "Min Price (USD)"},
             text="avg_price", 
             hover_data=["listings", "neighborhoods", "max_price", "min_price"])
fig.show()

### As expected, Manhattan listings have the highest average price.

### Unexpectedly, Staten Island, has, by far, the least number of listings and the lowest max price but has the third highest average price.

In [61]:
neighborhood_groups.sort_values(by="listings", ascending=False)

Unnamed: 0,neighborhood_group,neighborhoods,listings,avg_price,max_price,min_price
0,Manhattan,34,16823,265.31,16500,10
1,Brooklyn,51,14841,157.97,10000,10
3,Queens,58,6174,131.39,10000,10
4,Bronx,55,1567,124.82,9994,10
2,Staten Island,46,446,143.16,2500,33


In [63]:
fig = px.pie(data_frame=neighborhood_groups,
             names="neighborhood_group",
             values="listings",
             labels={"neighborhood_group": "Neighborhood Group", "listings": "Listings"},
             title="Percent of Listings per Neighborhood Group")
fig.show()

<b>79.4% of listings are located in either Manhattan or Brooklyn.</b>

In [26]:
del neighborhood_groups

### Breakdown the average price by neighborhood and neighborhood_group

In [64]:
%%sql
SELECT
    neighborhood_group,
    neighborhood,
    COUNT(id) AS listings,
    ROUND(AVG(price), 2) AS avg_price
FROM
    airbnb.listings
GROUP BY
    neighborhood_group, neighborhood
ORDER BY
    neighborhood_group, avg_price DESC; 

 * mysql+pymysql://root:***@localhost/airbnb
244 rows affected.


Unnamed: 0,neighborhood_group,neighborhood,listings,avg_price
0,Bronx,Riverdale,9,317.11
1,Bronx,Longwood,67,265.27
2,Bronx,Pelham Islands,1,250.00
3,Bronx,Ferry Point Park,1,225.00
4,Bronx,Spuyten Duyvil,5,198.80
...,...,...,...,...
239,Staten Island,Mariners Harbor,22,76.82
240,Staten Island,West Brighton,27,76.19
241,Staten Island,Grant City,14,60.14
242,Staten Island,Bull's Head,1,60.00


### Rank the average price of each neighborhood in the neighborhood groups
*Highest average salary receives a rank of 1*

In [65]:
%%sql
SELECT
    *,
    RANK() OVER (PARTITION BY neighborhood_group ORDER BY avg_price DESC) AS price_rank
FROM
    (SELECT
        neighborhood_group,
        neighborhood,
        COUNT(id) AS listings,
        ROUND(AVG(price), 2) AS avg_price
    FROM
        airbnb.listings
    GROUP BY
        neighborhood_group, neighborhood
    ) AS neighborhoods;

 * mysql+pymysql://root:***@localhost/airbnb
244 rows affected.


Unnamed: 0,neighborhood_group,neighborhood,listings,avg_price,price_rank
0,Bronx,Riverdale,9,317.11,1
1,Bronx,Longwood,67,265.27,2
2,Bronx,Pelham Islands,1,250.00,3
3,Bronx,Ferry Point Park,1,225.00,4
4,Bronx,Spuyten Duyvil,5,198.80,5
...,...,...,...,...,...
239,Staten Island,Mariners Harbor,22,76.82,42
240,Staten Island,West Brighton,27,76.19,43
241,Staten Island,Grant City,14,60.14,44
242,Staten Island,Bull's Head,1,60.00,45


### Compare the average price of listings the same price rank

In [66]:
%%sql
SELECT
    *
FROM
    (SELECT
        *, RANK() OVER (PARTITION BY neighborhood_group ORDER BY avg_price DESC) AS price_rank
    FROM
        (SELECT
            neighborhood_group,
            neighborhood,
            COUNT(id) AS listings,
            ROUND(AVG(price), 2) AS avg_price
        FROM
            airbnb.listings
        GROUP BY
            neighborhood_group, neighborhood
        ) AS neighborhoods
    ) AS neighborhood_rank
WHERE
    price_rank = 1
ORDER BY
    avg_price DESC;

 * mysql+pymysql://root:***@localhost/airbnb
5 rows affected.


Unnamed: 0,neighborhood_group,neighborhood,listings,avg_price,price_rank
0,Brooklyn,Prospect Park,7,653.14,1
1,Staten Island,Fort Wadsworth,1,650.0,1
2,Manhattan,Tribeca,246,533.98,1
3,Queens,Hollis Hills,1,497.0,1
4,Bronx,Riverdale,9,317.11,1


### Compare listings of the same neighborhood group

In [67]:
%%sql
SELECT
    *
FROM
    (SELECT
        *, RANK() OVER (PARTITION BY neighborhood_group ORDER BY avg_price DESC) AS price_rank
    FROM
        (SELECT
            neighborhood_group,
            neighborhood,
            COUNT(id) AS listings,
            ROUND(AVG(price), 2) AS avg_price
        FROM
            airbnb.listings
        GROUP BY
            neighborhood_group, neighborhood
        ) AS neighborhoods
    ) AS neighborhood_rank
WHERE
    neighborhood_group = "Manhattan"
ORDER BY
    avg_price DESC;

 * mysql+pymysql://root:***@localhost/airbnb
34 rows affected.


Unnamed: 0,neighborhood_group,neighborhood,listings,avg_price,price_rank
0,Manhattan,Tribeca,246,533.98,1
1,Manhattan,Theater District,321,473.19,2
2,Manhattan,SoHo,242,450.57,3
3,Manhattan,Flatiron District,98,407.53,4
4,Manhattan,Midtown,1694,383.28,5
5,Manhattan,Financial District,480,349.05,6
6,Manhattan,Greenwich Village,206,338.06,7
7,Manhattan,West Village,572,319.79,8
8,Manhattan,Murray Hill,371,306.65,9
9,Manhattan,Lower East Side,794,305.73,10


## CREATE STORED PROCEDURES FOR LAST TWO QUERIES

Check the stored procedures

In [68]:
# Call the stored procedure that will pull the records of neighborhoods
# of the same 'price_rank'
%sql call airbnb.neighborhood_group_rank('3');

 * mysql+pymysql://root:***@localhost/airbnb
5 rows affected.


Unnamed: 0,neighborhood_group,neighborhood,listings,avg_price,price_rank
0,Manhattan,SoHo,242,450.57,3
1,Queens,Belle Harbor,7,354.86,3
2,Staten Island,Willowbrook,1,329.0,3
3,Brooklyn,Bergen Beach,18,281.17,3
4,Bronx,Pelham Islands,1,250.0,3


In [69]:
# Call the stored procedure that will display the records from the specified
# 'neighborhood_group' ranked by 'avg_price'
%sql call airbnb.neighborhood_rank('Staten Island');

 * mysql+pymysql://root:***@localhost/airbnb
46 rows affected.


Unnamed: 0,neighborhood_group,neighborhood,listings,avg_price,price_rank
0,Staten Island,Fort Wadsworth,1,650.0,1
1,Staten Island,Port Richmond,20,342.3,2
2,Staten Island,Willowbrook,1,329.0,3
3,Staten Island,Tottenville,8,286.0,4
4,Staten Island,Eltingville,5,210.8,5
5,Staten Island,Prince's Bay,6,206.17,6
6,Staten Island,New Brighton,10,198.3,7
7,Staten Island,Bloomfield,1,188.0,8
8,Staten Island,Tompkinsville,37,179.68,9
9,Staten Island,Todt Hill,4,179.0,10


In [95]:
def neighborhood_group_hist(city: str, column: str = "price", bins: int = 100):
    """
        Helper function that takes a 'neighborhood_group', "city", as a parameter and returns a histogram
        of the distribution for the given 'city' and 'column'.
        
        Parameter
        ============
        city: str
              The name of a 'neighborhood_group' whose data is queried from the database.
              
        column: str
                Name of a column from dataframe returned by the SQL query.  This corresponds to the category
                you want to get the distribution for.
                (Default: "price")
        bins: int
              The number of bins that will be present in the histogram.
              (Default: 100)
    """
    
    %sql city_df << SELECT * FROM airbnb.listings WHERE neighborhood_group = :city;
    
    fig = px.histogram(data_frame = city_df,
                       x = column,
                       nbins = bins,
                       title = f"{column.capitalize()} Distribution for {city}",
                       labels = {"price": "Price"})
    fig.show()

In [96]:
neighborhood_group_hist(city="Manhattan")

 * mysql+pymysql://root:***@localhost/airbnb
16823 rows affected.
Returning data to local variable city_df


# Get a list of the Borough Names

In [93]:
%sql borough_df << SELECT DISTINCT neighborhood_group FROM airbnb.listings;
boroughs = borough_df.neighborhood_group.to_list()

 * mysql+pymysql://root:***@localhost/airbnb
5 rows affected.
Returning data to local variable borough_df


In [97]:
for borough in boroughs:
    neighborhood_group_hist(city=borough)

 * mysql+pymysql://root:***@localhost/airbnb
14841 rows affected.
Returning data to local variable city_df


 * mysql+pymysql://root:***@localhost/airbnb
6174 rows affected.
Returning data to local variable city_df


 * mysql+pymysql://root:***@localhost/airbnb
1567 rows affected.
Returning data to local variable city_df


 * mysql+pymysql://root:***@localhost/airbnb
16823 rows affected.
Returning data to local variable city_df


 * mysql+pymysql://root:***@localhost/airbnb
446 rows affected.
Returning data to local variable city_df


## Which neighborhood have the most availabilities?

In [115]:
%%sql
SELECT 
    neighborhood_group, neighborhood, ROUND(AVG(price), 2) AS avg_price, COUNT(availability_365) AS cnt_avail
FROM
    airbnb.listings
WHERE availability_365 > 0
GROUP BY neighborhood_group, neighborhood
ORDER BY cnt_avail DESC
LIMIT 10;

 * mysql+pymysql://root:***@localhost/airbnb
10 rows affected.


Unnamed: 0,neighborhood_group,neighborhood,avg_price,cnt_avail
0,Brooklyn,Bedford-Stuyvesant,158.44,1833
1,Manhattan,Harlem,189.37,1176
2,Manhattan,Midtown,431.11,1157
3,Brooklyn,Williamsburg,234.79,1136
4,Manhattan,Hell's Kitchen,308.73,1093
5,Manhattan,Upper West Side,252.1,1002
6,Brooklyn,Bushwick,138.77,943
7,Manhattan,Upper East Side,279.87,898
8,Brooklyn,Crown Heights,167.01,717
9,Manhattan,Chelsea,313.4,550


In [117]:
%%sql avail_df <<
SELECT 
    neighborhood_group, neighborhood, ROUND(AVG(price), 2) AS avg_price, COUNT(availability_365) AS cnt_avail
FROM
    airbnb.listings
WHERE availability_365 > 0
GROUP BY neighborhood_group, neighborhood
ORDER BY cnt_avail DESC

 * mysql+pymysql://root:***@localhost/airbnb
242 rows affected.
Returning data to local variable avail_df


In [127]:
fig = px.histogram(data_frame = avail_df,
                       x = "cnt_avail",
                       nbins = 20,
                       title = f"Distribution of Available Rentals",
                       labels = {"cnt_avail": "Number of Availabilities"})
fig.show()

<b>The majority of the neighborhoods have under 100 available listings.

## Which neighborhood groups have the most availabilities?

In [124]:
%%sql borough_avail_df <<
SELECT 
    neighborhood_group, COUNT(availability_365) AS cnt_avail, ROUND(AVG(price), 2) AS avg_price
FROM
    airbnb.listings
WHERE availability_365 > 0
GROUP BY neighborhood_group
ORDER BY cnt_avail DESC;

 * mysql+pymysql://root:***@localhost/airbnb
5 rows affected.
Returning data to local variable borough_avail_df


In [125]:
borough_avail_df

Unnamed: 0,neighborhood_group,cnt_avail,avg_price
0,Manhattan,10372,305.28
1,Brooklyn,8958,178.21
2,Queens,4509,140.85
3,Bronx,1283,131.15
4,Staten Island,398,145.25


In [134]:
fig = px.bar(data_frame=borough_avail_df,
             x="neighborhood_group",
             y="cnt_avail",
             title="Listings Available by Neighborhood Group",
             text="cnt_avail",
             labels={"cnt_avail": "No. of Availabilities",
                     "neighborhood_group": "Neighborhood Group",
                     "avg_price": "Avg. Price"},
             hover_data=["avg_price"],
             color="neighborhood_group")
fig.show()

In [133]:
fig = px.pie(data_frame=borough_avail_df,
             names="neighborhood_group",
             values="cnt_avail",
             labels={"neighborhood_group": "Neighborhood Group",
                     "cnt_avail": "No. of Availabilities",
                     "avg_price": "Avg. Price"},
             title="Percent of Available Listings per Neighborhood Group",
             hover_data=["avg_price"])
fig.show()

### How many listings have no availabilities for next year by neighborhood group?

In [146]:
%%sql
SELECT 
    neighborhood_group,
    COUNT(availability_365) AS rentals_unavailable,
    ROUND(AVG(price), 2) AS avg_price
FROM
    airbnb.listings
WHERE availability_365 <= 0
GROUP BY neighborhood_group
ORDER BY rentals_unavailable DESC;

 * mysql+pymysql://root:***@localhost/airbnb
5 rows affected.


Unnamed: 0,neighborhood_group,rentals_unavailable,avg_price
0,Manhattan,6451,201.04
1,Brooklyn,5883,127.15
2,Queens,1665,105.77
3,Bronx,284,96.21
4,Staten Island,48,125.88


In [149]:
%%sql
SELECT
    ur.neighborhood_group,
    ar.rentals_available,
    ar.avg_price AS avg_price_avail,
    ur.rentals_unavailable,
    ur.avg_price AS avg_price_unavail
FROM 
    (SELECT 
        neighborhood_group,
        COUNT(availability_365) AS rentals_unavailable,
        ROUND(AVG(price), 2) AS avg_price
    FROM
        airbnb.listings
    WHERE availability_365 <= 0
    GROUP BY neighborhood_group
    ORDER BY rentals_unavailable DESC) AS ur
        JOIN
    (SELECT 
        neighborhood_group,
        COUNT(availability_365) AS rentals_available,
        ROUND(AVG(price), 2) AS avg_price
    FROM
        airbnb.listings
    WHERE availability_365 > 0
    GROUP BY neighborhood_group
    ORDER BY rentals_available DESC) AS ar
    ON ur.neighborhood_group = ar.neighborhood_group;

 * mysql+pymysql://root:***@localhost/airbnb
5 rows affected.


Unnamed: 0,neighborhood_group,rentals_available,avg_price_avail,rentals_unavailable,avg_price_unavail
0,Manhattan,10372,305.28,6451,201.04
1,Brooklyn,8958,178.21,5883,127.15
2,Queens,4509,140.85,1665,105.77
3,Bronx,1283,131.15,284,96.21
4,Staten Island,398,145.25,48,125.88


## Does a lack of availability mean that the listing is booked? Or no longer going to rent?