### Imports

In [1]:
from bs4 import BeautifulSoup 
import requests
import pandas as pd

### HTTP Request

#### store website in variable

In [2]:
website = 'https://www.trulia.com/CA/San_Francisco/'

#### Get Request

In [3]:
response=requests.get(website)

#### Status Code

In [4]:
response.status_code

200

### Soup Object

In [5]:
soup = BeautifulSoup(response.content,'html.parser')

In [6]:
#soup

### Results

In [7]:
result = soup.find_all('li',{'class':'SearchResultsList__WideCell-b7y9ki-2'})

In [8]:
len(result)

32

In [9]:
# on page we have 30 elements but we are getting 32 we need to change select criteria

#### Update Result

In [10]:
results_update = []

In [11]:
for i in  result:
    if i.has_attr('data-testid'):
        results_update.append(i)

In [12]:
len(results_update)

30

#### Target Necessary Data

In [13]:
#Street
results_update[0].find('div',{'data-testid':'property-street'}).get_text()

'4366 25th St'

In [14]:
# region
results_update[0].find('div',{'data-testid':'property-region'}).get_text()

'Noe Valley, San Francisco, CA'

In [15]:
# bedrooms
results_update[0].find('div',{'data-testid':'property-beds'}).get_text()

'3bd'

In [16]:
# bath-rooms
results_update[0].find('div',{'data-testid':'property-baths'}).get_text()

'2ba'

In [17]:
# price
# bedrooms
results_update[0].find('div',{'data-testid':'property-price'}).get_text()

'$1,795,000'

#### Append Results in List

In [18]:
streets=[result.find('div',{'data-testid':'property-street'}).get_text() for result in results_update]

In [19]:
len(streets)

30

In [20]:
streets

['4366 25th St',
 '990 Hollister Ave',
 '2018 42nd Ave',
 '70 Crestlake Dr',
 '2695 23rd Ave',
 '45 Capra Way',
 '2582 Filbert St',
 '42 Mars St',
 '520 Miramar Ave',
 '720 Laguna Honda Blvd',
 '2698 Pacific Ave',
 '1836 Mason St',
 '1879 23rd Ave',
 '461 2nd St #C303',
 '870 Harrison St #405',
 '1468 Van Dyke Ave',
 '348 Eureka St',
 '1921 Washington St',
 '882 Moultrie St',
 '224 Sea Cliff Ave',
 '4440 20th St',
 '1700 Cayuga Ave',
 '607-607A Arkansas St',
 '662 Hearst Ave',
 '2219 Pacific Ave',
 '600 Gates St',
 '4366 26th St',
 '126 Clayton St',
 '2828 Vallejo St',
 '333 Haight St #3']

In [21]:
regions=[result.find('div',{'data-testid':'property-region'}).get_text() for result in results_update]
beds=[result.find('div',{'data-testid':'property-beds'}).get_text() for result in results_update]
baths=[result.find('div',{'data-testid':'property-baths'}).get_text() for result in results_update]
prices=[result.find('div',{'data-testid':'property-price'}).get_text().strip() for result in results_update]

In [22]:
print(len(regions))
print(len(beds))
print(len(baths))
print(len(prices))

30
30
30
30


### Create DataFrame

In [23]:
real_estate=pd.DataFrame({'Street':streets,'Region':regions,'Bedrooms':beds,'Bathrooms':baths,'Price':prices})

In [24]:
real_estate.head()

Unnamed: 0,Street,Region,Bedrooms,Bathrooms,Price
0,4366 25th St,"Noe Valley, San Francisco, CA",3bd,2ba,"$1,795,000"
1,990 Hollister Ave,"Bret Harte, San Francisco, CA",3bd,2ba,"$859,000"
2,2018 42nd Ave,"Outer Sunset, San Francisco, CA",2bd,1ba,"$899,000"
3,70 Crestlake Dr,"Parkside, San Francisco, CA",3bd,3ba,"$1,495,000"
4,2695 23rd Ave,"Parkside, San Francisco, CA",4bd,3ba,"$1,399,000"


#### OutPut Excel

In [25]:
real_estate.to_excel('real_estate_page_1.xlsx',index=False)

#### Scrapping Multiple Pages

In [27]:
real_estate_new = pd.DataFrame(columns=['Street','Region','Bedrooms','Bathrooms','Price'])
for i in range(1,4):
    new_website=requests.get('https://www.trulia.com/CA/San_Francisco/'+str(i)+'_p/')
    new_soup = BeautifulSoup(new_website.content,'html.parser')
    new_result = new_soup.find_all('li',{'class':'SearchResultsList__WideCell-b7y9ki-2'})
    new_results_update=[]
    
    for r in new_result:
        if r.has_attr('data-testid'):
            new_results_update.append(r)
    streets=[result.find('div',{'data-testid':'property-street'}).get_text() for result in new_results_update]
    regions=[result.find('div',{'data-testid':'property-region'}).get_text() for result in new_results_update]
    beds=[result.find('div',{'data-testid':'property-beds'}).get_text() for result in new_results_update]
    baths=[result.find('div',{'data-testid':'property-baths'}).get_text() for result in new_results_update]
    prices=[result.find('div',{'data-testid':'property-price'}).get_text().strip() for result in new_results_update]
    
    for k in range(len(streets)):
        real_estate_new=real_estate_new.append({'Street':streets[k],'Region':regions[k],'Bedrooms':beds[k],'Bathrooms':baths[k],
                                                'Price':prices[k]},ignore_index=True
                                              )

In [28]:
real_estate_new.head()

Unnamed: 0,Street,Region,Bedrooms,Bathrooms,Price
0,4366 25th St,"Noe Valley, San Francisco, CA",3bd,5ba,"$1,795,000"
1,990 Hollister Ave,"Bret Harte, San Francisco, CA",3bd,2ba,"$859,000"
2,2018 42nd Ave,"Outer Sunset, San Francisco, CA",2bd,6ba,"$899,000"
3,70 Crestlake Dr,"Parkside, San Francisco, CA",3bd,2ba,"$1,495,000"
4,2695 23rd Ave,"Parkside, San Francisco, CA",4bd,3ba,"$1,399,000"


In [29]:
real_estate_new.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Street     90 non-null     object
 1   Region     90 non-null     object
 2   Bedrooms   90 non-null     object
 3   Bathrooms  90 non-null     object
 4   Price      90 non-null     object
dtypes: object(5)
memory usage: 3.6+ KB


In [30]:
real_estate_new['Bedrooms']=real_estate_new['Bedrooms'].apply(lambda x:x.strip('bd'))
real_estate_new['Bathrooms']=real_estate_new['Bathrooms'].apply(lambda x:x.strip('ba'))

real_estate_new.head()

Unnamed: 0,Street,Region,Bedrooms,Bathrooms,Price
0,4366 25th St,"Noe Valley, San Francisco, CA",3,5,"$1,795,000"
1,990 Hollister Ave,"Bret Harte, San Francisco, CA",3,2,"$859,000"
2,2018 42nd Ave,"Outer Sunset, San Francisco, CA",2,6,"$899,000"
3,70 Crestlake Dr,"Parkside, San Francisco, CA",3,2,"$1,495,000"
4,2695 23rd Ave,"Parkside, San Francisco, CA",4,3,"$1,399,000"
