### Imports

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd 

### Put all together

In [2]:
# create empty dataframe
real_estate = pd.DataFrame(columns=['Street', 'Regions', 'Beds', 'Baths', 'Price'])

# scrape 3 pages

#sequence of numbers (1 up 3, 4 is not included)
for i in range(1,4):
    # website request
    website = requests.get('https://www.trulia.com/NY/New_York/' +str(i) + '_p/')

    # create soup object
    soup = BeautifulSoup(website.content, 'html.parser')

    # result items
    result = soup.find_all('li', {'class': 'SearchResultsList__WideCell-b7y9ki-2'})

    #update results
    results_update = []

    for r in result:
        if r.has_attr('data-testid'):
            results_update.append(r)

    # Lists
    streets = [result.find('div', {'data-testid': 'property-street'}).get_text() for result in results_update]
    regions = [result.find('div', {'data-testid': 'property-region'}).get_text() for result in results_update]
    beds = [result.find('div', {'data-testid': 'property-beds'}).get_text() for result in results_update]
    baths = [result.find('div', {'data-testid': 'property-baths'}).get_text() for result in results_update]
    prices = [result.find('div', {'data-testid': 'property-price'}).get_text() for result in results_update]
    
    for k in range(len(streets)):
        real_estate = real_estate.append({'Street': streets[k], 'Regions': regions[k], 'Beds': beds[k],
                                         'Baths': baths[k], 'Price': prices[k]}, ignore_index=True)

In [3]:
real_estate

Unnamed: 0,Street,Regions,Beds,Baths,Price
0,111 W 57th St #72,"Midtown, New York, NY",4bd,6ba,"$66,000,000"
1,555 W End Ave #PENTHOUSE,"Upper West Side, New York, NY",6bd,8ba,"$42,000,000"
2,35 Hudson Yards #90,"Hudson Yards, New York, NY",5bd,7ba,"$59,000,000"
3,12 E 63rd St,"Upper East Side, New York, NY",7bd,11ba,"$63,000,000"
4,303 E 57th St #6D,"Sutton Place, New York, NY",2bd,2ba,"$375,000"
...,...,...,...,...,...
85,18317 Dalny Rd,"Jamaica Estates, Jamaica, NY",5bd,4ba,"$2,295,000"
86,105-14 134th St,"South Ozone Park, South Richmond Hill, NY",3bd,3ba,"$389,000"
87,813 8th Ave #4R,"Park Slope, Brooklyn, NY",2bd,1ba,"$775,000"
88,333 E 91st St #15A,"Upper East Side, New York, NY",3bd,3ba,"$1,895,000"


### Information about Dataframe

In [4]:
# dataframe info
real_estate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Street   90 non-null     object
 1   Regions  90 non-null     object
 2   Beds     90 non-null     object
 3   Baths    90 non-null     object
 4   Price    90 non-null     object
dtypes: object(5)
memory usage: 3.6+ KB


In [5]:
# first 5 results
real_estate.head()

Unnamed: 0,Street,Regions,Beds,Baths,Price
0,111 W 57th St #72,"Midtown, New York, NY",4bd,6ba,"$66,000,000"
1,555 W End Ave #PENTHOUSE,"Upper West Side, New York, NY",6bd,8ba,"$42,000,000"
2,35 Hudson Yards #90,"Hudson Yards, New York, NY",5bd,7ba,"$59,000,000"
3,12 E 63rd St,"Upper East Side, New York, NY",7bd,11ba,"$63,000,000"
4,303 E 57th St #6D,"Sutton Place, New York, NY",2bd,2ba,"$375,000"


In [6]:
# last 5 results
real_estate.tail()

Unnamed: 0,Street,Regions,Beds,Baths,Price
85,18317 Dalny Rd,"Jamaica Estates, Jamaica, NY",5bd,4ba,"$2,295,000"
86,105-14 134th St,"South Ozone Park, South Richmond Hill, NY",3bd,3ba,"$389,000"
87,813 8th Ave #4R,"Park Slope, Brooklyn, NY",2bd,1ba,"$775,000"
88,333 E 91st St #15A,"Upper East Side, New York, NY",3bd,3ba,"$1,895,000"
89,990 5th Ave,"Upper East Side, New York, NY",6bd,5ba,"$35,000,000"


### Data Cleaning

In [7]:
real_estate['Beds'] = real_estate['Beds'].apply(lambda x: x.strip('bd'))
real_estate['Baths'] = real_estate['Baths'].apply(lambda x: x.strip('ba'))

#### Updated Dataframe

In [8]:
real_estate

Unnamed: 0,Street,Regions,Beds,Baths,Price
0,111 W 57th St #72,"Midtown, New York, NY",4,6,"$66,000,000"
1,555 W End Ave #PENTHOUSE,"Upper West Side, New York, NY",6,8,"$42,000,000"
2,35 Hudson Yards #90,"Hudson Yards, New York, NY",5,7,"$59,000,000"
3,12 E 63rd St,"Upper East Side, New York, NY",7,11,"$63,000,000"
4,303 E 57th St #6D,"Sutton Place, New York, NY",2,2,"$375,000"
...,...,...,...,...,...
85,18317 Dalny Rd,"Jamaica Estates, Jamaica, NY",5,4,"$2,295,000"
86,105-14 134th St,"South Ozone Park, South Richmond Hill, NY",3,3,"$389,000"
87,813 8th Ave #4R,"Park Slope, Brooklyn, NY",2,1,"$775,000"
88,333 E 91st St #15A,"Upper East Side, New York, NY",3,3,"$1,895,000"


### Save in Excel

In [9]:
real_estate.to_excel('realestate_multiple_pages.xlsx', index=False)