In [3]:
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
import time

## PART 1
#### Scrape links, addresses and prices of all the rental properties from the website

In [None]:

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9"
}

response = requests.get("https://appbrewery.github.io/Zillow-Clone/", headers=headers)

data = response.text
soup = BeautifulSoup(data, 'html.parser')
# print(soup.prettify())

In [8]:
# Create a list of all the links on the page using CSS Selector
all_links_elements = soup.select(".StyledPropertyCardDataWrapper a")

all_links = [link["href"] for link in all_links_elements]
print(f"There are {len(all_links)} links to individual listins in total: \n")
print(all_links)

There are 44 links to individual listins in total: 

['https://www.zillow.com/b/747-geary-street-oakland-ca-CYzGVt/', 'https://www.zillow.com/apartments/san-francisco-ca/parkmerced/5XjKHx/', 'https://www.zillow.com/apartments/san-francisco-ca/845-sutter/5XkKMm/', 'https://www.zillow.com/apartments/san-francisco-ca/100-van-ness/5hJ5Sv/', 'https://www.zillow.com/apartments/san-francisco-ca/828-franklin/5XkH2V/', 'https://www.zillow.com/apartments/san-francisco-ca/923-folsom/5Yy6Np/', 'https://www.zillow.com/apartments/san-francisco-ca/hanover-soma-west/9NJsx9/', 'https://www.zillow.com/apartments/san-francisco-ca/slate-residences/9NJxjf/', 'https://www.zillow.com/apartments/san-francisco-ca/northpoint-apartments/5XjLPJ/', 'https://www.zillow.com/apartments/san-francisco-ca/the-landing/9NK3gC/', 'https://www.zillow.com/apartments/san-francisco-ca/1350-washington-street/9NKDS7/', 'https://www.zillow.com/apartments/san-francisco-ca/2775-market-st/5XsQ4D/', 'https://www.zillow.com/apartments

In [10]:
# Create a list of all the addresses on the page by using a CSS Selector
# Then remove newlines i.e \n, pipe symbols i.e |, and all te whitespaces ...
# in order to clean up all the addresses data
all_addresses_elements = soup.select(".StyledPropertyCardDataWrapper address")

all_addresses = [address.get_text().replace("|", " ").strip() for address in all_addresses_elements]
print(f"After cleaning up the addresses data, the {len(all_addresses)} addresses now look like: \n")
print(all_addresses)

After cleaning up the addresses data, the 44 addresses now look like: 

['747 Geary Street, 747 Geary St, Oakland, CA 94609', 'Parkmerced   3711 19th Ave, San Francisco, CA', '845 Sutter, 845 Sutter St APT 509, San Francisco, CA', '100 Van Ness, 100 Van Ness Ave #410, San Francisco, CA 94102', '828 Franklin, 828 Franklin St #606, San Francisco, CA 94102', '923 Folsom, 923 Folsom St APT 506, San Francisco, CA 94107', 'Hanover Soma West, 1140 Harrison St #138, San Francisco, CA 94103', 'Slate Residences, 911 Bryant St #102, San Francisco, CA 94103', 'NorthPoint Apartments, 2211 Stockton St, San Francisco, CA 94133', 'The Landing   1395 22nd St, San Francisco, CA', '1350 Washington Street   1350 Washington St, San Francisco, CA', '2775 Market St, 2775 Market St APT 102, San Francisco, CA 94114', 'Mt. Sutro, 480 Warren Dr #312, San Francisco, CA 94131', 'Konrad on the Park, 971 Eddy St #212, San Francisco, CA 94109', '1188 Mission at Trinity Place   1188 Mission St, San Francisco, CA', 'No

In [11]:
# Create a list of all prices on the page using a CSS Selector
# Remove newlines \n, pipe symbols |, and all the whitespaces ...
# from the prices data
all_prices_elements = soup.select(".PropertyCardWrapper span")

all_prices = [price.get_text().replace("/mo", "").split("+")[0] for price in all_prices_elements]
print(f"After cleaning up the prices dataset, the {len(all_prices)} prices now look as follows: \n")
print(all_prices)

After cleaning up the prices dataset, the 44 prices now look as follows: 

['$2,895', '$2,810', '$2,450', '$2,940', '$2,395', '$2,816', '$2,974', '$2,704', '$2,810', '$2,798', '$2,195', '$2,995', '$2,895', '$2,805', '$1914', '$2,950', '$2,917', '$2,595', '$2,000', '$2,824', '$2,800', '$2,450', '$2,095', '$2,298', '$2,809', '$2,495', '$2,494', '$2,775', '$1,745', '$2,764', '$2,799', '$2,525', '$2,199', '$1,995', '$1,895', '$2,898', '$2,999', '$2,830', '$2,895', '$2,775', '$2,998', '$2,895', '$2,773', '$2,975']


## PART 2
#### Fill in the Google form using selenium

In [13]:
# Open Chrome browser
options = webdriver.ChromeOptions()
options.add_experimental_option("detach", True)
driver = webdriver.Chrome(options=options)

In [21]:
for i in range(len(all_links)):
    # Add your google form link
    driver.get("https://docs.google.com/forms/d/e/1FAIpQLSeyMYtqGBeoxsFN72P_ZfEpzR-6ZZDnfnQ6syB2xzjAx1bT3g/viewform?usp=sf_link")

    # Use xpath to grab the "short answer" fields in the Google form
    address = driver.find_element(by=By.XPATH, value='//*[@id="mG61Hd"]/div[2]/div/div[2]/div[1]/div/div/div[2]/div/div[1]/div/div[1]/input')
    price = driver.find_element(by=By.XPATH, value='//*[@id="mG61Hd"]/div[2]/div/div[2]/div[2]/div/div/div[2]/div/div[1]/div/div[1]/input')
    link = driver.find_element(by=By.XPATH, value='//*[@id="mG61Hd"]/div[2]/div/div[2]/div[3]/div/div/div[2]/div/div[1]/div/div[1]/input')
    submit_button = driver.find_element(by=By.XPATH, value='//*[@id="mG61Hd"]/div[2]/div/div[3]/div[1]/div[1]/div/span')

    address.send_keys(all_addresses[i])
    price.send_keys(all_prices[i])
    link.send_keys(all_links[i])
    submit_button.click()