In [1]:
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use("ggplot")

## Pulling down the Listing Data

In [2]:
r = requests.get('https://www.renthop.com/nyc/apartments-for-rent')
r.content

b'<!doctype html>\n<html lang="en">\n<head>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n<meta http-equiv="Content-Language" content="en" />\n<title>New York Apartment Finder, NYC Apartments Search</title>\n<meta name="description" content="Newly listed NYC apartments for rent. Smooth work commute, popular bars and nightlife, nearby restaurants and grocery stores, and safety. Find your perfect home in New York, NY." />\n<meta name="author" content="RentHop" />\n<meta name="Copyright" content="Copyright (c) 2009 - 2020 RentHop.com" />\n<meta property="fb:page_id" content="124300320712" />\n<meta property="fb:app_id" content="294321126236" />\n<meta name="og:image" content="https://www.renthop.com/images/renthop_icon_small.png" />\n<link rel="image_src" href="https://www.renthop.com/images/renthop_icon_small.png" />\n<meta name="og:title" content="New York Apartment Finder, NYC Apartments Search" />\n<meta name="og:description" content="Newly listed NYC apartme

In [3]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(r.content, "html5lib")

In [4]:
listing_divs = soup.select('div[class*=search-info]')
listing_divs

[<div class="search-info pl-3 pl-md-4 pr-3 pr-md-0 pt-0 pb-4 py-md-0">
 <div>
 <div class="float-right font-size-9" style="padding-top: 2px;">
 <span class="font-gray-2 d-none d-sm-inline-block"></span>
 <span class="d-none d-sm-inline-block" style="color: #dddddd;"> | </span>
 <span class="d-none d-sm-inline-block font-gray-2">Score:</span>
 <span class="d-none d-sm-inline-block b font-blue" id="listing-59483253-hopscore" style="">94.9</span>
 </div>
 <a class="font-size-11 listing-title-link b" href="https://www.renthop.com/listings/698-tenth-avenue/phb/59483253" id="listing-59483253-title">698 Amsterdam Avenue, Apt PHB</a>
 <div class="font-size-9 overflow-ellipsis" id="listing-59483253-neighborhoods" style="line-height: 130%;">
 Upper West Side, Upper Manhattan, Manhattan
 </div>
 </div>
 <div style="margin-top: 8px;">
 <div id="listing-59483253-info">
 <div class="d-inline-block align-bottom" id="listing-59483253-price">
 <span class="font-size-13 b">$2,995</span>
 </div>
 <div cl

In [5]:
len(listing_divs)

22

## Pulling out the Individual Data Points

These are the points we are targeting;
- URL of the listing
- Address of the apartment
- Neighborhood
- Number of bedrooms
- Number of bathrooms

In [6]:
listing_divs[0]

<div class="search-info pl-3 pl-md-4 pr-3 pr-md-0 pt-0 pb-4 py-md-0">
<div>
<div class="float-right font-size-9" style="padding-top: 2px;">
<span class="font-gray-2 d-none d-sm-inline-block"></span>
<span class="d-none d-sm-inline-block" style="color: #dddddd;"> | </span>
<span class="d-none d-sm-inline-block font-gray-2">Score:</span>
<span class="d-none d-sm-inline-block b font-blue" id="listing-59483253-hopscore" style="">94.9</span>
</div>
<a class="font-size-11 listing-title-link b" href="https://www.renthop.com/listings/698-tenth-avenue/phb/59483253" id="listing-59483253-title">698 Amsterdam Avenue, Apt PHB</a>
<div class="font-size-9 overflow-ellipsis" id="listing-59483253-neighborhoods" style="line-height: 130%;">
Upper West Side, Upper Manhattan, Manhattan
</div>
</div>
<div style="margin-top: 8px;">
<div id="listing-59483253-info">
<div class="d-inline-block align-bottom" id="listing-59483253-price">
<span class="font-size-13 b">$2,995</span>
</div>
<div class="d-inline-block

In [7]:
listing_divs[0].select('a[class*=title]')[0]

<a class="font-size-11 listing-title-link b" href="https://www.renthop.com/listings/698-tenth-avenue/phb/59483253" id="listing-59483253-title">698 Amsterdam Avenue, Apt PHB</a>

In [8]:
href = listing_divs[0].select('a[class*=title]')[0]['href']
addy = listing_divs[0].select('a[class*=title]')[0].string
hood = listing_divs[0].select('div[id*=hood]')[0].string.replace('\n', '')

print(href)
print(addy)
print(hood)

https://www.renthop.com/listings/698-tenth-avenue/phb/59483253
698 Amsterdam Avenue, Apt PHB
Upper West Side, Upper Manhattan, Manhattan


### To get the last 3 items; bedroom, bathroom, and price.

In [9]:
listing_specs = listing_divs[5].select('div[id*=info]')
for spec in listing_specs:
    spec_data = spec.text.strip().replace('|', '').replace(' ', '_').split()
    print(spec_data)

['$2,815', 'Studio', '1_Bath']


Putting all together;

In [10]:
def parse_data(listing_divs):
    listing_list = []

    for idx in range(len(listing_divs)):
        indv_listing = []
        current_listing = listing_divs[idx]
        href = current_listing.select('a[id*=title]')[0]['href']
        addy = current_listing.select('a[id*=title]')[0].string
        hood = current_listing.select('div[id*=hood]')[0].string.replace('\n', '')

        indv_listing.extend([href, addy, hood])

        listing_specs = current_listing.select('div[id*=info]')
        for spec in listing_specs:
            try:
                values = spec.text.strip().replace(' ', '_').replace('|', '').split()
                clean_values = [x for x in values if x != '_']
                indv_listing.extend(clean_values)
            except:
                indv_listig.extend(np.nan)
        listing_list.append(indv_listing)
    return listing_list

In [11]:
parse_data(listing_divs)

[['https://www.renthop.com/listings/698-tenth-avenue/phb/59483253',
  '698 Amsterdam Avenue, Apt PHB',
  'Upper West Side, Upper Manhattan, Manhattan',
  '$2,995',
  '2_Bed',
  '2_Bath'],
 ['https://www.renthop.com/listings/225-east-10th-street/5bb/59274815',
  '225 East 10th Street, Apt 5BB',
  'East Village, Downtown Manhattan, Manhattan',
  '$1,800',
  'Studio',
  '1_Bath'],
 ['https://www.renthop.com/listings/225-east-10th-street/3ee/59279450',
  '225 East 10th Street, Apt 3EE',
  'East Village, Downtown Manhattan, Manhattan',
  '$1,800',
  'Studio',
  '1_Bath'],
 ['https://www.renthop.com/listings/25-hillside-avenue-2b/2b/59215713',
  '25 Hillside Avenue, Apt 2B',
  'Fort George, Washington Heights, Upper Manhattan, Manhattan',
  '$1,776',
  '1_Bed',
  '1_Bath'],
 ['https://www.renthop.com/listings/525-west-52nd-street/2nn/59054932',
  '525 West 52nd Street, Apt 2NN',
  "Hell's Kitchen, Midtown Manhattan, Manhattan",
  '$5,600',
  '2_Bed',
  '2_Bath'],
 ['https://www.renthop.com/l

To scrape all the pages;

In [12]:
url_prefix = "https://www.renthop.com/search/nyc?max_price=50000&min_price=0&page="
page_no = 1
url_suffix = "&sort=hopscore&q=&search=0"

all_pages_parsed = []

In [61]:
for i in range(200):
    target_page = url_prefix + str(page_no) + url_suffix
    print(target_page)
    r = requests.get(target_page)
    
    soup = BeautifulSoup(r.content, 'html5lib')
    
    listing_divs = soup.select('div[class*=search-info]')
    
    one_page_parsed = parse_data(listing_divs)
    
    all_pages_parsed.extend(one_page_parsed)
    
    page_no += 1

https://www.renthop.com/search/nyc?max_price=50000&min_price=0&page=401&sort=hopscore&q=&search=0
https://www.renthop.com/search/nyc?max_price=50000&min_price=0&page=402&sort=hopscore&q=&search=0
https://www.renthop.com/search/nyc?max_price=50000&min_price=0&page=403&sort=hopscore&q=&search=0
https://www.renthop.com/search/nyc?max_price=50000&min_price=0&page=404&sort=hopscore&q=&search=0
https://www.renthop.com/search/nyc?max_price=50000&min_price=0&page=405&sort=hopscore&q=&search=0
https://www.renthop.com/search/nyc?max_price=50000&min_price=0&page=406&sort=hopscore&q=&search=0
https://www.renthop.com/search/nyc?max_price=50000&min_price=0&page=407&sort=hopscore&q=&search=0
https://www.renthop.com/search/nyc?max_price=50000&min_price=0&page=408&sort=hopscore&q=&search=0
https://www.renthop.com/search/nyc?max_price=50000&min_price=0&page=409&sort=hopscore&q=&search=0
https://www.renthop.com/search/nyc?max_price=50000&min_price=0&page=410&sort=hopscore&q=&search=0
https://www.renthop.

ConnectionError: ('Connection aborted.', OSError(0, 'Error'))

In [62]:
all_pages_parsed

[['https://www.renthop.com/listings/10-montieth-street/341/59518457',
  '10 Montieth Street, Apt 341',
  'Bushwick, Northern Brooklyn, Brooklyn',
  '$1,908',
  'Studio',
  '1_Bath'],
 ['https://www.renthop.com/listings/225-east-10th-street/5bb/59274815',
  '225 East 10th Street, Apt 5BB',
  'East Village, Downtown Manhattan, Manhattan',
  '$1,800',
  'Studio',
  '1_Bath'],
 ['https://www.renthop.com/listings/225-east-10th-street/3ee/59279450',
  '225 East 10th Street, Apt 3EE',
  'East Village, Downtown Manhattan, Manhattan',
  '$1,800',
  'Studio',
  '1_Bath'],
 ['https://www.renthop.com/listings/25-hillside-avenue-2b/2b/59215713',
  '25 Hillside Avenue, Apt 2B',
  'Fort George, Washington Heights, Upper Manhattan, Manhattan',
  '$1,776',
  '1_Bed',
  '1_Bath'],
 ['https://www.renthop.com/listings/525-west-52nd-street/2nn/59054932',
  '525 West 52nd Street, Apt 2NN',
  "Hell's Kitchen, Midtown Manhattan, Manhattan",
  '$5,600',
  '2_Bed',
  '2_Bath'],
 ['https://www.renthop.com/listin

In [63]:
df = pd.DataFrame(all_pages_parsed, columns=['url', 'address', 'neighborhood', 'rent', 'beds', 'baths', 'bad_col'])
print(df.shape)
df

(9229, 7)


Unnamed: 0,url,address,neighborhood,rent,beds,baths,bad_col
0,https://www.renthop.com/listings/10-montieth-s...,"10 Montieth Street, Apt 341","Bushwick, Northern Brooklyn, Brooklyn","$1,908",Studio,1_Bath,
1,https://www.renthop.com/listings/225-east-10th...,"225 East 10th Street, Apt 5BB","East Village, Downtown Manhattan, Manhattan","$1,800",Studio,1_Bath,
2,https://www.renthop.com/listings/225-east-10th...,"225 East 10th Street, Apt 3EE","East Village, Downtown Manhattan, Manhattan","$1,800",Studio,1_Bath,
3,https://www.renthop.com/listings/25-hillside-a...,"25 Hillside Avenue, Apt 2B","Fort George, Washington Heights, Upper Manhatt...","$1,776",1_Bed,1_Bath,
4,https://www.renthop.com/listings/525-west-52nd...,"525 West 52nd Street, Apt 2NN","Hell's Kitchen, Midtown Manhattan, Manhattan","$5,600",2_Bed,2_Bath,
...,...,...,...,...,...,...,...
9224,https://www.renthop.com/listings/east-48th-str...,East 48th Street,"Turtle Bay, Midtown East, Midtown Manhattan, M...","$4,958",3_Bed,2_Bath,
9225,https://www.renthop.com/listings/west-31st-str...,West 31st Street,"Chelsea, Midtown Manhattan, Manhattan","$3,735",1_Bed,1_Bath,
9226,https://www.renthop.com/listings/1093-dean-st/...,"1093 Dean Street, Apt 7","Crown Heights, Central Brooklyn, Brooklyn","$1,760",Studio,1_Bath,
9227,https://www.renthop.com/listings/wall-st/309/1...,Wall St,"Financial District, Downtown Manhattan, Manhattan","$3,450",1_Bed,/_Flex_2_,1_Bath


# Inspecting and Preparing the Data

### Treating the apartments with extra flexible rooms;

In [64]:
df[~df.bad_col.isnull()]

Unnamed: 0,url,address,neighborhood,rent,beds,baths,bad_col
7,https://www.renthop.com/listings/949-willoughb...,"949 Willoughby Avenue, Apt 111...","Bushwick, Northern Brooklyn, Brooklyn","$2,000",Studio,/_Flex_1_,1_Bath
13,https://www.renthop.com/listings/hudson-yards/...,Hudson Yards,"Hell's Kitchen, Midtown Manhattan, Manhattan","$2,746",1_Bed,/_Flex_2_,1_Bath
18,https://www.renthop.com/listings/298-troutman/...,"298 Troutman Street, Apt 3F","Bushwick, Northern Brooklyn, Brooklyn","$1,675",Studio,/_Flex_1_,1_Bath
20,https://www.renthop.com/listings/e-72nd-st/9i/...,E 72nd St,"Upper East Side, Upper Manhattan, Manhattan","$3,000",1_Bed,/_Flex_2_,1_Bath
22,https://www.renthop.com/listings/washington-st...,Washington st,"Financial District, Downtown Manhattan, Manhattan","$1,800",Studio,/_Flex_1_,1_Bath
...,...,...,...,...,...,...,...
9151,https://www.renthop.com/listings/121-madison-a...,"121 Madison Avenue, Apt 7K","NoMad, Midtown Manhattan, Manhattan","$2,350",1_Bed,/_Flex_3_,1_Bath
9161,https://www.renthop.com/listings/246-johnson-a...,"246 Johnson Avenue, Apt 1B","East Williamsburg, Williamsburg, Northern Broo...","$3,300",2_Bed,/_Flex_3_,2_Bath
9208,https://www.renthop.com/listings/159-west-53rd...,"159 West 53rd Street, Apt 26D","Theater District, Midtown Manhattan, Manhattan","$2,995",1_Bed,/_Flex_2_,1_Bath
9227,https://www.renthop.com/listings/wall-st/309/1...,Wall St,"Financial District, Downtown Manhattan, Manhattan","$3,450",1_Bed,/_Flex_2_,1_Bath


In [65]:
df['flexible_rooms'] = df.bad_col.apply(lambda row: 1 if str(row)[1:] == '_Bath' else 0)
df.head()

Unnamed: 0,url,address,neighborhood,rent,beds,baths,bad_col,flexible_rooms
0,https://www.renthop.com/listings/10-montieth-s...,"10 Montieth Street, Apt 341","Bushwick, Northern Brooklyn, Brooklyn","$1,908",Studio,1_Bath,,0
1,https://www.renthop.com/listings/225-east-10th...,"225 East 10th Street, Apt 5BB","East Village, Downtown Manhattan, Manhattan","$1,800",Studio,1_Bath,,0
2,https://www.renthop.com/listings/225-east-10th...,"225 East 10th Street, Apt 3EE","East Village, Downtown Manhattan, Manhattan","$1,800",Studio,1_Bath,,0
3,https://www.renthop.com/listings/25-hillside-a...,"25 Hillside Avenue, Apt 2B","Fort George, Washington Heights, Upper Manhatt...","$1,776",1_Bed,1_Bath,,0
4,https://www.renthop.com/listings/525-west-52nd...,"525 West 52nd Street, Apt 2NN","Hell's Kitchen, Midtown Manhattan, Manhattan","$5,600",2_Bed,2_Bath,,0


In [66]:
flexible_rooms = df[~df.bad_col.isnull()]
normal_rooms = df.drop(flexible_rooms.index)
print("Number of removed rows is:", flexible_rooms.shape[0])
print("Number of remaining rows are: ", normal_rooms.shape[0])
flexible_rooms

Number of removed rows is: 790
Number of remaining rows are:  8439


Unnamed: 0,url,address,neighborhood,rent,beds,baths,bad_col,flexible_rooms
7,https://www.renthop.com/listings/949-willoughb...,"949 Willoughby Avenue, Apt 111...","Bushwick, Northern Brooklyn, Brooklyn","$2,000",Studio,/_Flex_1_,1_Bath,1
13,https://www.renthop.com/listings/hudson-yards/...,Hudson Yards,"Hell's Kitchen, Midtown Manhattan, Manhattan","$2,746",1_Bed,/_Flex_2_,1_Bath,1
18,https://www.renthop.com/listings/298-troutman/...,"298 Troutman Street, Apt 3F","Bushwick, Northern Brooklyn, Brooklyn","$1,675",Studio,/_Flex_1_,1_Bath,1
20,https://www.renthop.com/listings/e-72nd-st/9i/...,E 72nd St,"Upper East Side, Upper Manhattan, Manhattan","$3,000",1_Bed,/_Flex_2_,1_Bath,1
22,https://www.renthop.com/listings/washington-st...,Washington st,"Financial District, Downtown Manhattan, Manhattan","$1,800",Studio,/_Flex_1_,1_Bath,1
...,...,...,...,...,...,...,...,...
9151,https://www.renthop.com/listings/121-madison-a...,"121 Madison Avenue, Apt 7K","NoMad, Midtown Manhattan, Manhattan","$2,350",1_Bed,/_Flex_3_,1_Bath,1
9161,https://www.renthop.com/listings/246-johnson-a...,"246 Johnson Avenue, Apt 1B","East Williamsburg, Williamsburg, Northern Broo...","$3,300",2_Bed,/_Flex_3_,2_Bath,1
9208,https://www.renthop.com/listings/159-west-53rd...,"159 West 53rd Street, Apt 26D","Theater District, Midtown Manhattan, Manhattan","$2,995",1_Bed,/_Flex_2_,1_Bath,1
9227,https://www.renthop.com/listings/wall-st/309/1...,Wall St,"Financial District, Downtown Manhattan, Manhattan","$3,450",1_Bed,/_Flex_2_,1_Bath,1


In [67]:
flexible_rooms = flexible_rooms.drop('baths', 1)
flexible_rooms.rename(columns={'bad_col': 'baths'}, inplace=True)
flexible_rooms.head(2)

Unnamed: 0,url,address,neighborhood,rent,beds,baths,flexible_rooms
7,https://www.renthop.com/listings/949-willoughb...,"949 Willoughby Avenue, Apt 111...","Bushwick, Northern Brooklyn, Brooklyn","$2,000",Studio,1_Bath,1
13,https://www.renthop.com/listings/hudson-yards/...,Hudson Yards,"Hell's Kitchen, Midtown Manhattan, Manhattan","$2,746",1_Bed,1_Bath,1


In [68]:
normal_rooms = normal_rooms.drop('bad_col', 1)
normal_rooms.head()

Unnamed: 0,url,address,neighborhood,rent,beds,baths,flexible_rooms
0,https://www.renthop.com/listings/10-montieth-s...,"10 Montieth Street, Apt 341","Bushwick, Northern Brooklyn, Brooklyn","$1,908",Studio,1_Bath,0
1,https://www.renthop.com/listings/225-east-10th...,"225 East 10th Street, Apt 5BB","East Village, Downtown Manhattan, Manhattan","$1,800",Studio,1_Bath,0
2,https://www.renthop.com/listings/225-east-10th...,"225 East 10th Street, Apt 3EE","East Village, Downtown Manhattan, Manhattan","$1,800",Studio,1_Bath,0
3,https://www.renthop.com/listings/25-hillside-a...,"25 Hillside Avenue, Apt 2B","Fort George, Washington Heights, Upper Manhatt...","$1,776",1_Bed,1_Bath,0
4,https://www.renthop.com/listings/525-west-52nd...,"525 West 52nd Street, Apt 2NN","Hell's Kitchen, Midtown Manhattan, Manhattan","$5,600",2_Bed,2_Bath,0


**Merging the two separate data together;**

In [69]:
df = pd.concat([normal_rooms, flexible_rooms])
df.sort_index(inplace=True)
df.tail(3)

Unnamed: 0,url,address,neighborhood,rent,beds,baths,flexible_rooms
9226,https://www.renthop.com/listings/1093-dean-st/...,"1093 Dean Street, Apt 7","Crown Heights, Central Brooklyn, Brooklyn","$1,760",Studio,1_Bath,0
9227,https://www.renthop.com/listings/wall-st/309/1...,Wall St,"Financial District, Downtown Manhattan, Manhattan","$3,450",1_Bed,1_Bath,1
9228,https://www.renthop.com/listings/water-street/...,Water Street,"Financial District, Downtown Manhattan, Manhattan","$4,250",2_Bed,2_Bath,1


### Inspecting the Data Points for each column;

In [70]:
df.beds.unique()

array(['Studio', '1_Bed', '2_Bed', '3_Bed', '4_Bed', '5_Bed', '_3_Bed',
       '_Studio', 'Room', '6_Bed', 'Loft', '_2_Bed', '_5_Bed', '_1_Bed',
       '7_Bed', '_4_Bed'], dtype=object)

In [71]:
df.baths.unique()

array(['1_Bath', '2_Bath', '1.5_Bath', '_2_Bath', '3_Bath', '5_Bath',
       '2.5_Bath', '_1_Bath', '4_Bath', '3.5_Bath', '8_Bath', '4.5_Bath',
       '_1.5_Bath', '_2.5_Bath'], dtype=object)

In [72]:
df['beds'] = df['beds'].map(lambda x: x[1:] if x.startswith('_') else x)
df['baths'] = df['baths'].map(lambda x: x[1:] if x.startswith('_') else x)

In [73]:
df.beds.unique()

array(['Studio', '1_Bed', '2_Bed', '3_Bed', '4_Bed', '5_Bed', 'Room',
       '6_Bed', 'Loft', '7_Bed'], dtype=object)

In [74]:
df.baths.unique()

array(['1_Bath', '2_Bath', '1.5_Bath', '3_Bath', '5_Bath', '2.5_Bath',
       '4_Bath', '3.5_Bath', '8_Bath', '4.5_Bath'], dtype=object)

In [75]:
df.describe(include='O')

Unnamed: 0,url,address,neighborhood,rent,beds,baths
count,9229,9225,9229,9229,9229,9229
unique,8088,5560,190,1618,10,10
top,https://www.renthop.com/listings/e-81-street/3...,Wall Street,"Yorkville, Upper East Side, Upper Manhattan, M...","$2,200",1_Bed,1_Bath
freq,17,83,687,234,3403,7359


In [76]:
df['rent'] = df.rent.map(lambda x: str(x).replace('$', '').replace(',', '')).astype('int')
df['beds'] = df.beds.map(lambda x: x.replace('Loft', '0'))
df['beds'] = df.beds.map(lambda x: x.replace('Studio', '0'))
df['beds'] = df.beds.map(lambda x: x.replace('Room', '0'))
df['beds'] = df.beds.map(lambda x: x.replace('_Bed', '')).astype('int')
df['baths'] = df.baths.map(lambda x: x.replace('_Bath', '')).astype('float')

In [77]:
df.describe()

Unnamed: 0,rent,beds,baths,flexible_rooms
count,9229.0,9229.0,9229.0,9229.0
mean,2993.032073,1.401669,1.2192,0.082132
std,2083.78927,1.062626,0.497834,0.274581
min,650.0,0.0,1.0,0.0
25%,2000.0,1.0,1.0,0.0
50%,2495.0,1.0,1.0,0.0
75%,3299.0,2.0,1.0,0.0
max,50000.0,7.0,8.0,1.0


### Sneak-peek at the Data types

In [78]:
df.dtypes

url                object
address            object
neighborhood       object
rent                int64
beds                int64
baths             float64
flexible_rooms      int64
dtype: object

In [79]:
df.groupby('neighborhood')['rent'].count().to_frame('count')\
.sort_values(by='count', ascending=False)

Unnamed: 0_level_0,count
neighborhood,Unnamed: 1_level_1
"Yorkville, Upper East Side, Upper Manhattan, Manhattan",687
"Hell's Kitchen, Midtown Manhattan, Manhattan",623
"Financial District, Downtown Manhattan, Manhattan",582
"Upper East Side, Upper Manhattan, Manhattan",528
"Upper West Side, Upper Manhattan, Manhattan",400
...,...
"Exchange Place North, The Waterfront, Hudson",1
"East Elmhurst, Northwestern Queens, Queens",1
"Sunnyside, Long Island City, Northwestern Queens, Queens",1
"New Lots, East New York, Eastern Brooklyn, Brooklyn",1


In [80]:
df[df['neighborhood'].str.contains('Upper East Side')]['neighborhood'].value_counts()

Yorkville, Upper East Side, Upper Manhattan, Manhattan        687
Upper East Side, Upper Manhattan, Manhattan                   528
Carnegie Hill, Upper East Side, Upper Manhattan, Manhattan     65
Lenox Hill, Upper East Side, Upper Manhattan, Manhattan        17
 Upper East Side, Upper Manhattan, Manhattan                    3
 Yorkville, Upper East Side, Upper Manhattan, Manhattan         2
Upper East Side, Upper Manhattan, Manhattan                     2
Yorkville, Upper East Side, Upper Manhattan, Manhattan          1
Lenox Hill, Upper East Side, Upper Manhattan, Manhattan         1
Name: neighborhood, dtype: int64

In [81]:
df['neighborhood'] = df['neighborhood'].map(lambda x: x.strip())

In [82]:
df[df['neighborhood'].str.contains('Upper East Side')]['neighborhood'].value_counts()

Yorkville, Upper East Side, Upper Manhattan, Manhattan        690
Upper East Side, Upper Manhattan, Manhattan                   533
Carnegie Hill, Upper East Side, Upper Manhattan, Manhattan     65
Lenox Hill, Upper East Side, Upper Manhattan, Manhattan        18
Name: neighborhood, dtype: int64

In [83]:
df.groupby('neighborhood')['rent'].mean().to_frame('mean').sort_values(by='mean', ascending=False)

Unnamed: 0_level_0,mean
neighborhood,Unnamed: 1_level_1
"Battery Park City, Downtown Manhattan, Manhattan",5474.292308
"Flatiron District, Midtown Manhattan, Manhattan",5181.673469
"SoHo, Downtown Manhattan, Manhattan",5084.933333
"Tribeca, Downtown Manhattan, Manhattan",4846.456376
"Midtown East, Midtown Manhattan, Manhattan",4485.224490
...,...
"Arverne, Far Rockaway, Rockaway Peninsula, Queens",1500.000000
"Bergen - Lafayette, Hudson",1499.000000
"Little Senegal, Central Harlem, Upper Manhattan, Manhattan",1466.666667
"Newport, The Waterfront, Hudson",1400.000000


In [84]:
df.to_csv("housing_data.csv", index=False)

### Visualizing 