## 캡스톤 프로젝트 - 데이터 입력 작업 자동화 
- 여러 다른 데이터 입력 작업 형식을 옮기는 작업 
- 여기서 우리가 다룰 작업은 데이터 입력 작업 - 질로우 웹사이트에 부동산 가격 조사 
    - URL: https://www.zillow.com
- 개요 
    - 이번 캡스톤 프로젝트에서는 웹사이트와 웹스크래필에 대해 배운 내용을 모두 적용하여 bs4, Selenium으로 프로젝트 요구사항을 충족시켜 프로젝트를 완성하는 것이 목표이다.
- 과정 
    1. 구글 설문지에 새 입력양식 만들기 - https://docs.google.com/forms/에서 새 입력양식을 만들기
    2. 양식에 '단답형' 질문 3개 추가하기 
        - What's the address of the property?
        - What's the price per month?
        - What's the link to the property?
    3. '보내기'를 클릭하고, 양식의 링크 주소를 복사 하기. 프로그램을 만들 때 해당 주소 필요 
    4. 질로우(Zillow) 사이트의 이 페이지에서 웹사이트 구조를 확인하기. 여기 있는 데이터를 스크래핑하기
        - 프로그램 요구사항
            - BeautifulSoup과 requests를 사용하여 질로우 웹페이지에 있는 목록을 모두 스크래핑
            - 스크래핑한 항목들의 링크를 리스트로 만들기 
            - 스크래핑한 항목들의 가격을 리스트로 만들기
            - 스크래핑한 항목들의 주소를 리스트로 만들기 
            - 셀레니움으로 (위 1, 2, 3단계에서)생성한 입력양식을 채우기. 항목은 각각의 양식으로 저장하며, 양식에는 가격, 주소, 링크가 추가되어야 한다.
            - 데이터를 모두 입력하고 나면, 구글 설문지의 응답 탭에서 '구글 시트' 아이콘을 눌러 시트를 만들기. 부동산 정보가 담긴 스프레스 시트가 만들어진다.

### Part 1 - Zillow 스크래핑 하기

In [4]:
# 필요 모듈 볼러오기 
import requests
from urllib.request import urlopen
from bs4 import BeautifulSoup

# 상수설정 
# zillow URL 
ZILLOW = "https://www.zillow.com/san-francisco-ca/rentals/?searchQueryState=%7B%22pagination%22%3A%7B%7D%2C%22usersSearchTerm%22%3A%22San%20Francisco%2C%20CA%22%2C%22mapBounds%22%3A%7B%22west%22%3A-122.5516041887207%2C%22east%22%3A-122.3150548112793%2C%22south%22%3A37.65674456506729%2C%22north%22%3A37.89364955836753%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A20330%2C%22regionType%22%3A6%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22fsba%22%3A%7B%22value%22%3Afalse%7D%2C%22fsbo%22%3A%7B%22value%22%3Afalse%7D%2C%22nc%22%3A%7B%22value%22%3Afalse%7D%2C%22fore%22%3A%7B%22value%22%3Afalse%7D%2C%22cmsn%22%3A%7B%22value%22%3Afalse%7D%2C%22auc%22%3A%7B%22value%22%3Afalse%7D%2C%22fr%22%3A%7B%22value%22%3Atrue%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A12%7D"

In [5]:
# request로 요청 
headers = {"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
          "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"}
response = requests.get(ZILLOW, headers=headers)
zillow_info = response.content
soup = BeautifulSoup(zillow_info, "html.parser")
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <link href="https://fonts.googleapis.com/css?family=Open+Sans:400,600,700&amp;display=swap" rel="stylesheet"/>
  <style type="text/css">
   @font-face{font-display:swap;font-family:Ivar Headline;font-style:normal;font-weight:600;src:url(//www.zillowstatic.com/static-zsg/LATEST/static-zsg/zsg/z-fonts/ivar/IvarHeadline-SemiBold-extended.woff2) format(&quot;woff2&quot;),url(//www.zillowstatic.com/static-zsg/LATEST/static-zsg/zsg/z-fonts/ivar/IvarHeadline-SemiBold-extended.woff) format(&quot;woff&quot;);unicode-range:u+0000-001f,u+0080-200f,u+2020-faff,u+fb10-ffff}@font-face{font-display:swap;font-family:Ivar Headline;font-style:normal;font-weight:600;src:url(//www.zillowstatic.com/static-zsg/LATEST/static-zsg/zsg/z-fonts/ivar/IvarHeadline-SemiBold-core.woff2) format(&quot;woff2&quot;),url(//www.zillowstatic.com/static-zsg/LATEST/static-zsg/zsg/z-fonts/ivar/IvarHeadline-SemiBold-core.woff) format(&quot;woff&quot;);unicode-range:u+0020-007f,u+201?,

In [97]:
# 주소
all_address = [rent_address[i].getText().split(" | ")[-1] for i in range(len(rent_address))]
all_address

['950 Tennessee St UNIT 124, San Francisco, CA 94107',
 '8 10th St, San Francisco, CA',
 'Metro @ Showplace Square, 670 King St, San Francisco, CA 94107',
 '747 Geary Street, 747 Geary St, San Francisco, CA 94109',
 '2 Townsend St, San Francisco, CA',
 '3711 19th Ave, San Francisco, CA',
 '845 Sutter, 845 Sutter St APT 509, San Francisco, CA 94109',
 '388 Beale St, San Francisco, CA',
 '100 Van Ness Ave, San Francisco, CA']

In [98]:
# 렌트 가격
all_price = []
rent_price = soup.select("div.StyledPropertyCardDataWrapper-c11n-8-84-3__sc-1omp4c3-0.bKpguY.property-card-data > div.StyledPropertyCardDataArea-c11n-8-84-3__sc-yipmu-0.fDSTNn > div")
for i in range(len(rent_price)):
    price = rent_price[i].getText()
    if "+" in price:
        all_price.append(price.split('+')[0])
    else:
        all_price.append(price.split('/')[0])
all_price

['$4,100',
 '$2,958',
 '$3,500',
 '$2,895',
 '$3,450',
 '$2,810',
 '$2,450',
 '$3,895',
 '$3,027']

In [100]:
# url 링크 
all_url = []
rent_url = soup.select("div.StyledPropertyCardDataWrapper-c11n-8-84-3__sc-1omp4c3-0.bKpguY.property-card-data > a")
for i in range(len(rent_url)):
    href = rent_url[i]["href"]
    if "https" not in href:
        all_url.append(f"https://www.zillow.com{href}")
    else:
        all_url.append(href)

all_url

['https://www.zillow.com/homedetails/950-Tennessee-St-UNIT-124-San-Francisco-CA-94107/335654026_zpid/',
 'https://www.zillow.com/b/nema-san-francisco-ca-9NJxW7/',
 'https://www.zillow.com/apartments/san-francisco-ca/metro-%40-showplace-square/5Yyppp/',
 'https://www.zillow.com/b/747-geary-street-san-francisco-ca-9NKJ3J/',
 'https://www.zillow.com/apartments/san-francisco-ca/south-beach-marina-apartments/5XjQyv/',
 'https://www.zillow.com/apartments/san-francisco-ca/parkmerced/5XjKHx/',
 'https://www.zillow.com/apartments/san-francisco-ca/845-sutter/5XkKMm/',
 'https://www.zillow.com/b/388-beale-san-francisco-ca-5XjQ4f/',
 'https://www.zillow.com/apartments/san-francisco-ca/100-van-ness/5hJ5Sv/']

In [None]:
# part 1 zillow scrapying
# 필요 모듈 볼러오기 
import time
from fake_useragent import UserAgent

import requests
from urllib.request import urlopen
from bs4 import BeautifulSoup

import undetected_chromedriver as uc 
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import ElementClickInterceptedException

# 상수설정 
# zillow URL 
ZILLOW = "https://www.zillow.com/san-francisco-ca/rentals/?searchQueryState=%7B%22pagination%22%3A%7B%7D%2C%22usersSearchTerm%22%3A%22San%20Francisco%2C%20CA%22%2C%22mapBounds%22%3A%7B%22west%22%3A-122.5516041887207%2C%22east%22%3A-122.3150548112793%2C%22south%22%3A37.65674456506729%2C%22north%22%3A37.89364955836753%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A20330%2C%22regionType%22%3A6%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22fsba%22%3A%7B%22value%22%3Afalse%7D%2C%22fsbo%22%3A%7B%22value%22%3Afalse%7D%2C%22nc%22%3A%7B%22value%22%3Afalse%7D%2C%22fore%22%3A%7B%22value%22%3Afalse%7D%2C%22cmsn%22%3A%7B%22value%22%3Afalse%7D%2C%22auc%22%3A%7B%22value%22%3Afalse%7D%2C%22fr%22%3A%7B%22value%22%3Atrue%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A12%7D"
# 구글 설문지 URL
RESEARCH_URL = "https://docs.google.com/forms/d/e/1FAIpQLSdRAn7IbenPcd_LHRopwM0dY0J_y_3000Lcp9EiB9cZZ8XpCw/viewform?usp=sf_link"

# beautifulsoup 작업 

# 해더 설정 
headers = {"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
          "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"}
# 요청 받기
response = requests.get(ZILLOW, headers=headers)
# zillow 사이트 정보 받기 
zillow_info = response.content
# html.parser로 파싱하기 
soup = BeautifulSoup(zillow_info, "html.parser")

# 주소
all_address = [rent_address[i].getText().split(" | ")[-1] for i in range(len(rent_address))]

# 렌트 가격
all_price = []
rent_price = soup.select("div.StyledPropertyCardDataWrapper-c11n-8-84-3__sc-1omp4c3-0.bKpguY.property-card-data > div.StyledPropertyCardDataArea-c11n-8-84-3__sc-yipmu-0.fDSTNn > div")
for i in range(len(rent_price)):
    price = rent_price[i].getText()
    if "+" in price:
        all_price.append(price.split('+')[0])
    else:
        all_price.append(price.split('/')[0])
        
# url 링크 
all_url = []
rent_url = soup.select("div.StyledPropertyCardDataWrapper-c11n-8-84-3__sc-1omp4c3-0.bKpguY.property-card-data > a")
for i in range(len(rent_url)):
    href = rent_url[i]["href"]
    if "https" not in href:
        all_url.append(f"https://www.zillow.com{href}")
    else:
        all_url.append(href)