<h1><a>Data Scrapping from Real Estate Listing</a></h1>

<h2><a id="index">Index</a></h2>

[1. Grab Property Price](#propertyprice)

[2. Grab Property Address](#propertyaddress)

[3. Scrapping Special Elements](#specialelements)

[4. Store Findings in Pandas DataFrame](#storedata)

[5. Loop Over Pages](#loopoverpages)

<h2><a id="propertyprice">1. Grab Property Price</a></h2>

[Index](#index)

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Load the first page
r = requests.get("https://pythonizing.github.io/data/real-estate/rock-springs-wy/LCWYROCKSPRINGS/")
c = r.content

In [3]:
soup = BeautifulSoup(c, "html.parser")

In [4]:
# Grab PropertyRow class
all = soup.find_all("div", {"class": "propertyRow"})
len(all)

10

In [5]:
price = [property.find("h4", {"class": "propPrice"}).text for property in all]

In [6]:
price = [int(value.replace("\n","").replace(" ", "").replace("$", "").replace(",", "")) for value in price]
price 

[725000,
 452900,
 396900,
 389900,
 254000,
 252900,
 210000,
 209000,
 199900,
 196900]

In [7]:
page_nbr = soup.find_all("a", {"class": "Page"})[-1].text
print(page_nbr)

3


<h2><a id="propertyaddress">2. Grab Property Address</a></h2>

[Index](#index)

In [8]:
for item in all:
    print((item.find("h4", {"class": "propPrice"}).text.replace("\n","").replace(" ", "")))
    print(item.find_all("span", {"class": "propAddressCollapse"})[0].text)
    print(item.find_all("span", {"class": "propAddressCollapse"})[1].text)
    try:
        print(item.find("span", {"class": "infoBed"}).find("b").text)
    except:
        print(None)
    try:
        print(item.find("span", {"class": "infoSqft"}).find("b").text)
    except:
        print(None)
    try:
        print(item.find("span", {"class": "infoValueFullBath"}).find("b").text)
    except:
        print(None)
    try:
        print(item.find("span", {"class": "infoValueHalfBath"}).find("b").text)
    except:
        print(None)
    print(" ")


$725,000
0 Gateway
Rock Springs, WY 82901
None
None
None
None
 
$452,900
1003 Winchester Blvd.
Rock Springs, WY 82901
4
None
4
None
 
$396,900
600 Talladega
Rock Springs, WY 82901
5
None
3
None
 
$389,900
3239 Spearhead Way
Rock Springs, WY 82901
4
None
3
1
 
$254,000
522 Emerald Street
Rock Springs, WY 82901
3
None
3
None
 
$252,900
1302 Veteran's Drive
Rock Springs, WY 82901
4
None
2
None
 
$210,000
1021 Cypress Cir
Rock Springs, WY 82901
4
None
3
None
 
$209,000
913 Madison Dr
Rock Springs, WY 82901
3
None
2
None
 
$199,900
1344 Teton Street
Rock Springs, WY 82901
3
None
2
None
 
$196,900
4 Minnies Lane
Rock Springs, WY 82901
3
None
2
None
 


<h2><a id="specialelements">3. Scrapping Special Elements</a></h2>

[Index](#index)

In [9]:
for item in all:
    print((item.find("h4", {"class": "propPrice"}).text.replace("\n","").replace(" ", "")))
    print(item.find_all("span", {"class": "propAddressCollapse"})[0].text)
    print(item.find_all("span", {"class": "propAddressCollapse"})[1].text)
    try:
        print(item.find("span", {"class": "infoBed"}).find("b").text)
    except:
        print(None)
    try:
        print(item.find("span", {"class": "infoSqft"}).find("b").text)
    except:
        print(None)
    try:
        print(item.find("span", {"class": "infoValueFullBath"}).find("b").text)
    except:
        print(None)
    try:
        print(item.find("span", {"class": "infoValueHalfBath"}).find("b").text)
    except:
        print(None)
    for column_group in item.find_all("div", {"class": "columnGroup"}):
        for feature_group, feature_name in zip(column_group.find_all("span", {"class": "featureGroup"}), column_group.find_all("span", {"class": "featureName"})):
            #print(feature_group.text, feature_name.text)
            if "Lot Size" in feature_group.text:
                print(feature_name.text)
    print(" ")

$725,000
0 Gateway
Rock Springs, WY 82901
None
None
None
None
 
$452,900
1003 Winchester Blvd.
Rock Springs, WY 82901
4
None
4
None
0.21 Acres
 
$396,900
600 Talladega
Rock Springs, WY 82901
5
None
3
None
 
$389,900
3239 Spearhead Way
Rock Springs, WY 82901
4
None
3
1
Under 1/2 Acre, 
 
$254,000
522 Emerald Street
Rock Springs, WY 82901
3
None
3
None
Under 1/2 Acre, 
 
$252,900
1302 Veteran's Drive
Rock Springs, WY 82901
4
None
2
None
0.27 Acres
 
$210,000
1021 Cypress Cir
Rock Springs, WY 82901
4
None
3
None
Under 1/2 Acre, 
 
$209,000
913 Madison Dr
Rock Springs, WY 82901
3
None
2
None
Under 1/2 Acre, 
 
$199,900
1344 Teton Street
Rock Springs, WY 82901
3
None
2
None
Under 1/2 Acre, 
 
$196,900
4 Minnies Lane
Rock Springs, WY 82901
3
None
2
None
2.02 Acres
 


<h2><a id="storedata">4. Store Findings in Pandas DataFrame</a></h2>

[Index](#index)

In [10]:
results ={
    "Price": [],
    "Address 1": [],
    "Address 2": [],
    "Beds": [],
    "Squarefoot": [],
    "Full Baths": [],
    "Half Baths": [],
    "Lot Size": []
}
for item in all:
    flag = 0
    results["Price"].append((item.find("h4", {"class": "propPrice"}).text.replace("\n","").replace(" ", "")))
    results["Address 1"].append(item.find_all("span", {"class": "propAddressCollapse"})[0].text)
    results["Address 2"].append(item.find_all("span", {"class": "propAddressCollapse"})[1].text)
    try:
        results["Beds"].append(item.find("span", {"class": "infoBed"}).find("b").text)
    except:
        results["Beds"].append(None)
    try:
        results["Squarefoot"].append(item.find("span", {"class": "infoSqft"}).find("b").text)
    except:
        results["Squarefoot"].append(None)
    try:
        results["Full Baths"].append(item.find("span", {"class": "infoValueFullBath"}).find("b").text)
    except:
        results["Full Baths"].append(None)
    try:
        results["Half Baths"].append(item.find("span", {"class": "infoValueHalfBath"}).find("b").text)
    except:
        results["Half Baths"].append(None)
    for column_group in item.find_all("div", {"class": "columnGroup"}):
        for feature_group, feature_name in zip(column_group.find_all("span", {"class": "featureGroup"}), column_group.find_all("span", {"class": "featureName"})):
            #print(feature_group.text, feature_name.text)
            if "Lot Size" in feature_group.text:
                results["Lot Size"].append(feature_name.text)
                flag = 1
    if flag == 0:
        results["Lot Size"].append(None)

In [11]:
results = pd.DataFrame(results)
results.head(10)

Unnamed: 0,Price,Address 1,Address 2,Beds,Squarefoot,Full Baths,Half Baths,Lot Size
0,"$725,000",0 Gateway,"Rock Springs, WY 82901",,,,,
1,"$452,900",1003 Winchester Blvd.,"Rock Springs, WY 82901",4.0,,4.0,,0.21 Acres
2,"$396,900",600 Talladega,"Rock Springs, WY 82901",5.0,,3.0,,
3,"$389,900",3239 Spearhead Way,"Rock Springs, WY 82901",4.0,,3.0,1.0,"Under 1/2 Acre,"
4,"$254,000",522 Emerald Street,"Rock Springs, WY 82901",3.0,,3.0,,"Under 1/2 Acre,"
5,"$252,900",1302 Veteran's Drive,"Rock Springs, WY 82901",4.0,,2.0,,0.27 Acres
6,"$210,000",1021 Cypress Cir,"Rock Springs, WY 82901",4.0,,3.0,,"Under 1/2 Acre,"
7,"$209,000",913 Madison Dr,"Rock Springs, WY 82901",3.0,,2.0,,"Under 1/2 Acre,"
8,"$199,900",1344 Teton Street,"Rock Springs, WY 82901",3.0,,2.0,,"Under 1/2 Acre,"
9,"$196,900",4 Minnies Lane,"Rock Springs, WY 82901",3.0,,2.0,,2.02 Acres


In [12]:
results.to_csv("House Price Info.csv", index=False)

<h2><a id="loopoverpages">5. Loop Over Pages</a></h2>

[Index](#index)

In [13]:
# Load other pages
base_url = "https://pythonizing.github.io/data/real-estate/rock-springs-wy/LCWYROCKSPRINGS/t=0&s="
urls = [base_url+str(i*10) for i in range(int(page_nbr))]

In [14]:
results ={
    "Price": [],
    "Address 1": [],
    "Address 2": [],
    "Beds": [],
    "Squarefoot": [],
    "Full Baths": [],
    "Half Baths": [],
    "Lot Size": []
}

In [15]:
# LOOP OVER Pages
for page in urls:
    r = requests.get(page)
    c = r.content
    soup = BeautifulSoup(c, "html.parser")
    # Grab PropertyRow class
    all = soup.find_all("div", {"class": "propertyRow"})
    for item in all:
        flag = 0
        results["Price"].append((item.find("h4", {"class": "propPrice"}).text.replace("\n","").replace(" ", "")))
        results["Address 1"].append(item.find_all("span", {"class": "propAddressCollapse"})[0].text)
        results["Address 2"].append(item.find_all("span", {"class": "propAddressCollapse"})[1].text)
        try:
            results["Beds"].append(item.find("span", {"class": "infoBed"}).find("b").text)
        except:
            results["Beds"].append(None)
        try:
            results["Squarefoot"].append(item.find("span", {"class": "infoSqft"}).find("b").text)
        except:
            results["Squarefoot"].append(None)
        try:
            results["Full Baths"].append(item.find("span", {"class": "infoValueFullBath"}).find("b").text)
        except:
            results["Full Baths"].append(None)
        try:
            results["Half Baths"].append(item.find("span", {"class": "infoValueHalfBath"}).find("b").text)
        except:
            results["Half Baths"].append(None)
        for column_group in item.find_all("div", {"class": "columnGroup"}):
            for feature_group, feature_name in zip(column_group.find_all("span", {"class": "featureGroup"}), column_group.find_all("span", {"class": "featureName"})):
                #print(feature_group.text, feature_name.text)
                if "Lot Size" in feature_group.text:
                    results["Lot Size"].append(feature_name.text)
                    flag = 1
        if flag == 0:
            results["Lot Size"].append(None)

In [16]:
results = pd.DataFrame(results)
results.head(40)


Unnamed: 0,Price,Address 1,Address 2,Beds,Squarefoot,Full Baths,Half Baths,Lot Size
0,"$725,000",0 Gateway,"Rock Springs, WY 82901",,,,,
1,"$452,900",1003 Winchester Blvd.,"Rock Springs, WY 82901",4.0,,4.0,,0.21 Acres
2,"$396,900",600 Talladega,"Rock Springs, WY 82901",5.0,,3.0,,
3,"$389,900",3239 Spearhead Way,"Rock Springs, WY 82901",4.0,,3.0,1.0,"Under 1/2 Acre,"
4,"$254,000",522 Emerald Street,"Rock Springs, WY 82901",3.0,,3.0,,"Under 1/2 Acre,"
5,"$252,900",1302 Veteran's Drive,"Rock Springs, WY 82901",4.0,,2.0,,0.27 Acres
6,"$210,000",1021 Cypress Cir,"Rock Springs, WY 82901",4.0,,3.0,,"Under 1/2 Acre,"
7,"$209,000",913 Madison Dr,"Rock Springs, WY 82901",3.0,,2.0,,"Under 1/2 Acre,"
8,"$199,900",1344 Teton Street,"Rock Springs, WY 82901",3.0,,2.0,,"Under 1/2 Acre,"
9,"$196,900",4 Minnies Lane,"Rock Springs, WY 82901",3.0,,2.0,,2.02 Acres


In [17]:
results.to_csv("House Price Info.csv", index=False)