### Imports

In [12]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

### Put all together

In [28]:
# create empty dataframe
real_estate = pd.DataFrame(columns=['Street','Regions','Beds','Baths','Price'])

# scrape 3 pages 
# sequence of number (1 up 3,4 is not included)
for i in range(1,4):
    # website request
    website = "https://www.trulia.com/CA/San_Francisco/"+ str(i) + "_p/"
    response = requests.get(website)

    # create soup object
    soup = BeautifulSoup(response.content,'html.parser')

    # result items
    results = soup.find_all('li', {'class':'SearchResultsList__WideCell-b7y9ki-2'})

    #update results
    results_update=[]
    for r in results:
        if r.has_attr('data-testid'):
            results_update.append(r)

            
    # Lists
    streets = [result.find('div',{'data-testid':'property-street'}).get('title') for result in results_update]
    regions = [result.find('div',{'data-testid':'property-region'}).get('title') for result in results_update]
    bedrooms = [result.find('div',{'data-testid':'property-beds'}).get_text() for result in results_update]
    bathrooms = [result.find('div',{'data-testid':'property-baths'}).get_text() for result in results_update]
    prices = [result.find('div',{'data-testid':'property-price'}).get('title') for result in results_update]
    
    for k in range(len(streets)):
        real_estate = real_estate.append({'Street':streets[k],
                                          'Regions':regions[k],
                                          'Beds':bedrooms[k],
                                          'Baths':bathrooms[k],
                                          'Price':prices[k],
                                         },ignore_index=True)

### Information about Dataframe

In [22]:
real_estate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Street   120 non-null    object
 1   Regions  120 non-null    object
 2   Beds     120 non-null    object
 3   Baths    120 non-null    object
 4   Price    120 non-null    object
dtypes: object(5)
memory usage: 4.8+ KB


In [23]:
real_estate.head()

Unnamed: 0,Street,Regions,Beds,Baths,Price
0,2018 42nd Ave,"Outer Sunset, San Francisco, CA",2bd,1ba,"$899,000"
1,4366 25th St,"Noe Valley, San Francisco, CA",3bd,2ba,"$1,795,000"
2,224 Sea Cliff Ave,"Seacliff, San Francisco, CA",6bd,7ba,"$15,375,000"
3,45 Capra Way,"Marina, San Francisco, CA",4bd,6ba,"$10,300,000"
4,122 Chicago Way,"Crocker Amazon, San Francisco, CA",4bd,3ba,"$1,095,000"


In [24]:
real_estate.tail()

Unnamed: 0,Street,Regions,Beds,Baths,Price
115,1506 25th St,"Potrero Hill, San Francisco, CA",4bd,3ba,"$1,995,000"
116,262 Downey St,"Ashbury Heights, San Francisco, CA",2bd,2ba,"$1,199,000"
117,148 Corwin St,"Eureka Valley, San Francisco, CA",2bd,3ba,"$1,699,000"
118,898 Francisco St,"Russian Hill, San Francisco, CA",6bd,7ba,"$9,950,000"
119,1828 Page St,"Haight, San Francisco, CA",2bd,2ba,"$1,395,000"


### Data Cleaning

In [25]:
real_estate['Beds'] = real_estate['Beds'].apply(lambda x: x.strip('bd'))
real_estate['Baths'] = real_estate['Baths'].apply(lambda x: x.strip('ba'))

#### Update DataFrame

In [26]:
real_estate

Unnamed: 0,Street,Regions,Beds,Baths,Price
0,2018 42nd Ave,"Outer Sunset, San Francisco, CA",2,1,"$899,000"
1,4366 25th St,"Noe Valley, San Francisco, CA",3,2,"$1,795,000"
2,224 Sea Cliff Ave,"Seacliff, San Francisco, CA",6,7,"$15,375,000"
3,45 Capra Way,"Marina, San Francisco, CA",4,6,"$10,300,000"
4,122 Chicago Way,"Crocker Amazon, San Francisco, CA",4,3,"$1,095,000"
...,...,...,...,...,...
115,1506 25th St,"Potrero Hill, San Francisco, CA",4,3,"$1,995,000"
116,262 Downey St,"Ashbury Heights, San Francisco, CA",2,2,"$1,199,000"
117,148 Corwin St,"Eureka Valley, San Francisco, CA",2,3,"$1,699,000"
118,898 Francisco St,"Russian Hill, San Francisco, CA",6,7,"$9,950,000"


### Save in Excel

In [27]:
real_estate.to_excel('realestate_multiple_pages.xlsx', index=False)