### Imports

In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd 

### Put all together

In [15]:
# create empty dataframe
real_estate = pd.DataFrame(columns=['Address', 'Beds', 'Baths', 'Price'])

# scrape 3 pages

#sequence of numbers (1 up 14, 15 is not included)
for i in range(1,15):
    # website request
    website = requests.get('https://www.trulia.com/NY/New_York/' +str(i) + '_p/')

    # create soup object
    soup = BeautifulSoup(website.content, 'html.parser')

    # result items
    result = soup.find_all('li', {'class': 'SearchResultsList__WideCell-b7y9ki-2'})

    #update results
    results_update = []

    for r in result:
        if r.has_attr('data-testid'):
            results_update.append(r)


In [17]:
 # Lists in loop
Address = [result.find('div', {'data-testid': 'property-address'}).get_text() for result in results_update]
beds = [result.find('div', {'data-testid': 'property-beds'}).get_text() for result in results_update]
prices = [result.find('div', {'data-testid': 'property-price'}).get_text() for result in results_update]
baths = []

for result in results_update:
    try:
        baths.append(result.find('div', {'data-testid':'property-baths'}).get_text())
    except:
            baths.append('n/a')

In [22]:
 for k in range(len(Address)):
        real_estate = real_estate.append({'Address': Address[k], 'Beds': beds[k],
                                         'Baths': baths[k], 'Price': prices[k]}, ignore_index=True)

In [20]:
real_estate

Unnamed: 0,Address,Beds,Baths,Price
0,"2 Grace Court Alley, Brooklyn, NY 11201",4bd,3ba,"$4,950,000"
1,"15 William St #22H, New York, NY 10005",2bd,2ba,"$1,895,000"
2,"1124 Forest Ave, Staten Island, NY 10310",9bd,6ba,"$1,639,000"
3,"11143 130th St, Jamaica, NY 11420",3bd,1ba,"$400,000"
4,"1813 Gerritsen Ave, Brooklyn, NY 11229",3bd,2ba,"$788,888"
...,...,...,...,...
75,"10651 Ruscoe St, Jamaica, NY 11433",4bd,2ba,"$400,000"
76,"9001 193rd St, Jamaica, NY 11423",4bd,4ba,"$1,600,000"
77,"1626 Saint Peters Ave, Bronx, NY 10461",7bd,2ba,"$1,075,000"
78,"85-24 165th St, Jamaica, NY 11432",4bd,4ba,"$1,175,000"


### Information about Dataframe

In [23]:
# dataframe info
real_estate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Address  120 non-null    object
 1   Beds     120 non-null    object
 2   Baths    120 non-null    object
 3   Price    120 non-null    object
dtypes: object(4)
memory usage: 3.9+ KB


In [24]:
# first 5 results
real_estate.head()

Unnamed: 0,Address,Beds,Baths,Price
0,"2 Grace Court Alley, Brooklyn, NY 11201",4bd,3ba,"$4,950,000"
1,"15 William St #22H, New York, NY 10005",2bd,2ba,"$1,895,000"
2,"1124 Forest Ave, Staten Island, NY 10310",9bd,6ba,"$1,639,000"
3,"11143 130th St, Jamaica, NY 11420",3bd,1ba,"$400,000"
4,"1813 Gerritsen Ave, Brooklyn, NY 11229",3bd,2ba,"$788,888"


In [26]:
# last 5 results
real_estate.tail()

Unnamed: 0,Address,Beds,Baths,Price
115,"10651 Ruscoe St, Jamaica, NY 11433",4bd,2ba,"$400,000"
116,"9001 193rd St, Jamaica, NY 11423",4bd,4ba,"$1,600,000"
117,"1626 Saint Peters Ave, Bronx, NY 10461",7bd,2ba,"$1,075,000"
118,"85-24 165th St, Jamaica, NY 11432",4bd,4ba,"$1,175,000"
119,"13840 229th St, Laurelton, NY 11413",4bd,2ba,"$599,000"


### Data Cleaning

In [27]:
real_estate['Beds'] = real_estate['Beds'].apply(lambda x: x.strip('bd'))
real_estate['Baths'] = real_estate['Baths'].apply(lambda x: x.strip('ba'))

#### Updated Dataframe

In [28]:
real_estate

Unnamed: 0,Address,Beds,Baths,Price
0,"2 Grace Court Alley, Brooklyn, NY 11201",4,3,"$4,950,000"
1,"15 William St #22H, New York, NY 10005",2,2,"$1,895,000"
2,"1124 Forest Ave, Staten Island, NY 10310",9,6,"$1,639,000"
3,"11143 130th St, Jamaica, NY 11420",3,1,"$400,000"
4,"1813 Gerritsen Ave, Brooklyn, NY 11229",3,2,"$788,888"
...,...,...,...,...
115,"10651 Ruscoe St, Jamaica, NY 11433",4,2,"$400,000"
116,"9001 193rd St, Jamaica, NY 11423",4,4,"$1,600,000"
117,"1626 Saint Peters Ave, Bronx, NY 10461",7,2,"$1,075,000"
118,"85-24 165th St, Jamaica, NY 11432",4,4,"$1,175,000"


### Save in Excel

In [29]:
real_estate.to_excel('realestate_multiple_pages.xlsx', index=False)