Use the requests library to download web pages

- Inspect the website's HTML source and identify the right URLs to download.
- Download and save web pages locally using the requests library.
- Create a function to automate downloading for different topics/search queries.

In [1]:
import requests

In [2]:
topics_url = 'https://www.remax.com/homes-for-sale/nv/reno/city/3260600'

In [3]:
response = requests.get(topics_url)

In [4]:
response.status_code

200

In [5]:
len(response.text)

271541

In [6]:
page_contents = response.text

In [7]:
page_contents[:1000]

'<!doctype html>\n<html data-n-head-ssr lang="en" data-n-head="%7B%22lang%22:%7B%22ssr%22:%22en%22%7D%7D">\n  <head >\n    <title>Reno, NV Real Estate &amp; Homes for Sale | RE/MAX</title><meta data-n-head="ssr" charset="utf-8"><meta data-n-head="ssr" name="google-site-verification" content="DNfEMQTm-Fcep3V_LUmztTa8sT4xHv1CjZ3tqNsUJAQ"><meta data-n-head="ssr" name="viewport" content="width=device-width, initial-scale=1, viewport-fit=cover"><meta data-n-head="ssr" name="facebook-domain-verification" content="l09r0fpqnm56wrh69okdhr85b5c5kc"><meta data-n-head="ssr" data-hid="ogImage" property="og:image" content="https://peak-static-prod.remax.booj.io/web/images/nice-house.jpg"><meta data-n-head="ssr" data-hid="description" name="description" content="Search the most complete Reno, NV real estate listings for sale. Find Reno, NV homes for sale, real estate, apartments, condos, townhomes, mobile homes, multi-family units, farm and land lots with RE/MAX&#x27;s powerful search tools."><meta d

USE BEAUTIFUL SOUPS TO PARSE AND EXTRACT INFO

In [8]:
from bs4 import BeautifulSoup

In [9]:
doc = BeautifulSoup(page_contents, 'html.parser')

Extract home address info

In [10]:
h5_tags = doc.find_all('h5')

In [11]:
len(h5_tags)

24

In [12]:
h5_tags[:5]

[<h5 data-v-10bd6ae4="">1950 REED ST<span data-v-10bd6ae4="">, Reno, NV 89512</span></h5>,
 <h5 data-v-10bd6ae4="">345 S HOWARD<span data-v-10bd6ae4="">, Virginia City, NV 89440</span></h5>,
 <h5 data-v-10bd6ae4="">12935 VALLEY SPRINGS RD<span data-v-10bd6ae4="">, Reno, NV 89511</span></h5>,
 <h5 data-v-10bd6ae4="">865 PENNSYLVANIA DR<span data-v-10bd6ae4="">, Reno, NV 89503</span></h5>,
 <h5 data-v-10bd6ae4="">000 MOUNTAIN ASPEN LN<span data-v-10bd6ae4="">, Reno, NV 89510</span></h5>]

In [13]:
h5_tags[0].text

'1950 REED ST, Reno, NV 89512'

In [14]:
home_address = []

for tag in h5_tags:
    home_address.append(tag.text)
    
print(home_address)

['1950 REED ST, Reno, NV 89512', '345 S HOWARD, Virginia City, NV 89440', '12935 VALLEY SPRINGS RD, Reno, NV 89511', '865 PENNSYLVANIA DR, Reno, NV 89503', '000 MOUNTAIN ASPEN LN, Reno, NV 89510', '3525 YOSEMITE PL, Reno, NV 89503', '2050 BRISBANE AVE, Reno, NV 89503', '487 AUTUMN BREEZE CIR, Reno, NV 89511', '15033 CUPRITE ST, Reno, NV 89506', '18168 CEDAR VIEW DR, Reno, NV 89508', '9280 FLEETWOOD DR, Reno, NV 89506', '10270 MOTT DR, Reno, NV 89521', '0 C ST, Virginia City, NV 89440', '225 BRENHAM AVE, Reno, NV 89509', '13475 ARROWSPRINGS DR, Reno, NV 89511', '6308 WALNUT CREEK RD, Reno, NV 89523', '12519 BRASS RIDGE ST # HOMESITE, Reno, NV 89521', '8605 RED BARON BLVD, Reno, NV 89506', '1500 DEL WEBB PKWY W, Reno, NV 89523', '4201 W HIDDEN VALLEY DR, Reno, NV 89502', '15130 BAILEY CANYON DR, Reno, NV 89521', '7855 CRYSTAL SHORES CT, Reno, NV 89506', '17185 SUNBIRD LN, Reno, NV 89508', '16160 RHYOLITE CIR, Reno, NV 89521']


Extract price info

In [15]:
h4_tags = doc.find_all('h4')

In [19]:
len(h4_tags)

29

In [22]:
price_selector = 'price'
price_tags = doc.find_all('h4', {'class' : price_selector})

In [23]:
len(price_tags)

24

In [25]:
price_tags[:5]

[<h4 class="price" data-v-10bd6ae4="">
         $325,000
       </h4>,
 <h4 class="price" data-v-10bd6ae4="">
         $295,000
       </h4>,
 <h4 class="price" data-v-10bd6ae4="">
         $550,000
       </h4>,
 <h4 class="price" data-v-10bd6ae4="">
         $575,000
       </h4>,
 <h4 class="price" data-v-10bd6ae4="">
         $299,000
       </h4>]

In [29]:
home_price = []

for tag in price_tags:
    home_price.append(tag.text.strip())
    
home_price[:5]

['$325,000', '$295,000', '$550,000', '$575,000', '$299,000']

Extract # of beds per property

In [30]:
beds_selector = 'mr-1'
bed_tags = doc.find_all('strong', {'class' : beds_selector})

In [31]:
bed_tags[:5]

[<strong class="mr-1" data-v-10bd6ae4="">2</strong>,
 <strong class="mr-1" data-v-10bd6ae4="">1</strong>,
 <strong class="mr-1" data-v-10bd6ae4="">1,039</strong>,
 <strong class="mr-1" data-v-10bd6ae4="">3.76</strong>,
 <strong class="mr-1" data-v-10bd6ae4="">3</strong>]

In [32]:
home_beds = []

for tag in bed_tags:
    home_beds.append(tag.text)
    
print(home_beds)

['2', '1', '1,039', '3.76', '3', '2', '1,642', '4', '3', '1,828', '48.79', '3', '2', '1,344', '3', '2', '1,672', '3', '3', '1,409', '4', '3', '1,587', '3', '2', '1,040', '3', '2', '1,056', '3', '3', '2,826', '0.62', '3', '3', '1,836', '4', '4', '3,342', '3', '2', '1,141', '2', '3', '2,005', '4', '2', '1,400', '3', '2', '2,496', '4', '3', '2,844', '3', '2', '1,838', '3', '2', '1,318', '3', '2', '1,292', '3', '3', '1,320']


Create dataframe using pandas

In [33]:
import pandas as pd

In [39]:
home_dict = {
    'address' : home_address,
    'price' : home_price,
}

In [40]:
home_df = pd.DataFrame(home_dict)

In [41]:
home_df

Unnamed: 0,address,price
0,"1950 REED ST, Reno, NV 89512","$325,000"
1,"345 S HOWARD, Virginia City, NV 89440","$295,000"
2,"12935 VALLEY SPRINGS RD, Reno, NV 89511","$550,000"
3,"865 PENNSYLVANIA DR, Reno, NV 89503","$575,000"
4,"000 MOUNTAIN ASPEN LN, Reno, NV 89510","$299,000"
5,"3525 YOSEMITE PL, Reno, NV 89503","$455,000"
6,"2050 BRISBANE AVE, Reno, NV 89503","$524,900"
7,"487 AUTUMN BREEZE CIR, Reno, NV 89511","$459,000"
8,"15033 CUPRITE ST, Reno, NV 89506","$325,000"
9,"18168 CEDAR VIEW DR, Reno, NV 89508","$379,000"


CREATING A CSV FILE WITH THE EXTRACTED INFO

In [42]:
home_df.to_csv('home.csv', index=None)