### Imports

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

### Put all together

In [4]:
# create empty dataframe
real_estate = pd.DataFrame(columns=['Street','Regions','Beds','Baths','Price'])

# scrape 3 pages 
# sequence of number (1 up 3,4 is not included)
for i in range(1,4):
    # website request
    website = "https://www.trulia.com/NY/New_York/"+ str(i) + "_p/"
    response = requests.get(website)

    # create soup object
    soup = BeautifulSoup(response.content,'html.parser')

    # result items
    results = soup.find_all('li', {'class':'SearchResultsList__WideCell-b7y9ki-2'})

    #update results
    results_update=[]
    for r in results:
        if r.has_attr('data-testid'):
            results_update.append(r)

    # Lists
    streets = [result.find('div',{'data-testid':'property-street'}).get('title') for result in results_update]
    regions = [result.find('div',{'data-testid':'property-region'}).get('title') for result in results_update]
    bedrooms = [result.find('div',{'data-testid':'property-beds'}).get_text() for result in results_update]
    bathrooms = [result.find('div',{'data-testid':'property-baths'}).get_text() for result in results_update]
    prices = [result.find('div',{'data-testid':'property-price'}).get('title') for result in results_update]
    
    for k in range(len(streets)):
        real_estate = real_estate.append({'Street':streets[k],
                                          'Regions':regions[k],
                                          'Beds':bedrooms[k],
                                          'Baths':bathrooms[k],
                                          'Price':prices[k],
                                         },ignore_index=True)

In [5]:
real_estate

Unnamed: 0,Street,Regions,Beds,Baths,Price
0,432 Park Ave #PENTHOUSE,"Midtown, New York, NY",6bd,9ba,"$169,000,000"
1,303 E 57th St #32B,"Sutton Place, New York, NY",2bd,3ba,"$379,000"
2,27 Prospect Park W #1A,"Park Slope, Brooklyn, NY",2bd,2ba,"$589,000"
3,303 E 57th St #6D,"Sutton Place, New York, NY",2bd,2ba,"$375,000"
4,35 Poland Pl,"Emerson Hill, Staten Island, NY",3bd,3ba,"$699,000"
...,...,...,...,...,...
85,2505 Tenbroeck Ave,"Pelham Gardens, Bronx, NY",3bd,3ba,"$715,000"
86,223 E 28th St,"Flatbush, Brooklyn, NY",3bd,2ba,"$599,000"
87,89-23 202nd St,"Hollis, Hollis, NY",4bd,2ba,"$649,000"
88,37 Groton St,"Eltingville, Staten Island, NY",4bd,4ba,"$849,999"


### Information about Dataframe

In [6]:
real_estate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Street   90 non-null     object
 1   Regions  90 non-null     object
 2   Beds     90 non-null     object
 3   Baths    90 non-null     object
 4   Price    90 non-null     object
dtypes: object(5)
memory usage: 3.6+ KB


In [7]:
real_estate.head()

Unnamed: 0,Street,Regions,Beds,Baths,Price
0,432 Park Ave #PENTHOUSE,"Midtown, New York, NY",6bd,9ba,"$169,000,000"
1,303 E 57th St #32B,"Sutton Place, New York, NY",2bd,3ba,"$379,000"
2,27 Prospect Park W #1A,"Park Slope, Brooklyn, NY",2bd,2ba,"$589,000"
3,303 E 57th St #6D,"Sutton Place, New York, NY",2bd,2ba,"$375,000"
4,35 Poland Pl,"Emerson Hill, Staten Island, NY",3bd,3ba,"$699,000"


In [8]:
real_estate.tail()

Unnamed: 0,Street,Regions,Beds,Baths,Price
85,2505 Tenbroeck Ave,"Pelham Gardens, Bronx, NY",3bd,3ba,"$715,000"
86,223 E 28th St,"Flatbush, Brooklyn, NY",3bd,2ba,"$599,000"
87,89-23 202nd St,"Hollis, Hollis, NY",4bd,2ba,"$649,000"
88,37 Groton St,"Eltingville, Staten Island, NY",4bd,4ba,"$849,999"
89,21-57 33rd St #4B,"Astoria, Astoria, NY",2bd,1ba,"$279,000"


### Data Cleaning

In [10]:
real_estate['Beds'] = real_estate['Beds'].apply(lambda x: x.strip('bd'))
real_estate['Baths'] = real_estate['Baths'].apply(lambda x: x.strip('ba'))

#### Update DataFrame

In [11]:
real_estate

Unnamed: 0,Street,Regions,Beds,Baths,Price
0,432 Park Ave #PENTHOUSE,"Midtown, New York, NY",6,9,"$169,000,000"
1,303 E 57th St #32B,"Sutton Place, New York, NY",2,3,"$379,000"
2,27 Prospect Park W #1A,"Park Slope, Brooklyn, NY",2,2,"$589,000"
3,303 E 57th St #6D,"Sutton Place, New York, NY",2,2,"$375,000"
4,35 Poland Pl,"Emerson Hill, Staten Island, NY",3,3,"$699,000"
...,...,...,...,...,...
85,2505 Tenbroeck Ave,"Pelham Gardens, Bronx, NY",3,3,"$715,000"
86,223 E 28th St,"Flatbush, Brooklyn, NY",3,2,"$599,000"
87,89-23 202nd St,"Hollis, Hollis, NY",4,2,"$649,000"
88,37 Groton St,"Eltingville, Staten Island, NY",4,4,"$849,999"


### Save in Excel

In [12]:
real_estate.to_excel('realestate_multiple_pages.xlsx', index=False)