# request, bs4

In [15]:
from bs4 import BeautifulSoup

# 첫 번째 인자: 파싱할 객체
# 두 번째 인자: 사용할 parser(구문 분석기)
soup = BeautifulSoup(html, 'html.parser') 

In [16]:
html = '''
<html>
  <head>
    <title>Example HTML</title>
  </head>
  <body>
    <div id='first' class='Example'>
      <h3 title='Good Content Title'>Contents Title</h3>
      <p>Test contents</p>
    </div>
    <div id='second' class='Example'>
      <p>Text1</p>
      <p>Text2</p>
      <p>Text3</p>
    </div>
  </body>
</html>
'''

In [17]:
tag = soup.find('h3')
print(tag)
tag.get_text()

<h3 title="Good Content Title">Contents Title</h3>


'Contents Title'

In [18]:
soup.find_all({'p', 'h3'})

[<h3 title="Good Content Title">Contents Title</h3>,
 <p>Test contents</p>,
 <p>Text1</p>,
 <p>Text2</p>,
 <p>Text3</p>]

In [19]:
soup.find('div', id='second')

<div class="Example" id="second">
<p>Text1</p>
<p>Text2</p>
<p>Text3</p>
</div>

In [20]:
attrs = {
    'id': 'second',
    'class': 'Example'
}
soup.find('div', attrs)

<div class="Example" id="second">
<p>Text1</p>
<p>Text2</p>
<p>Text3</p>
</div>

# Beautiful soup 예제

In [30]:
import requests
import re
from bs4 import BeautifulSoup

headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36"}

In [34]:
for i in range(1, 6):
    #print("페이지 :", i)
    url = "https://www.coupang.com/np/search?q=%EB%85%B8%ED%8A%B8%EB%B6%81&channel=user&component=&eventCategory=SRP&trcid=&traid=&sorter=scoreDesc&minPrice=&maxPrice=&priceRange=&filterType=&listSize=36&filter=&isPriceRange=false&brand=&offerCondition=&rating=0&page={}&rocketAll=false&searchIndexingToken=1=4&backgroundColor=".format(i)

    res = requests.get(url, headers=headers)
    res.raise_for_status()
    soup = BeautifulSoup(res.text, "lxml")

    items = soup.find_all("li", attrs={"class":re.compile("^search-product")})
    
    for item in items:
        # 광고 제품은 제외
        ad_badge = item.find("span", attrs={"class":"ad-badge-text"})
        if ad_badge:
            #print("  <광고 상품 제외합니다>")
            continue

        name = item.find("div", attrs={"class":"name"}).get_text() # 제품명
        # 애플 제품 제외
        if "Apple" in name:
            #print("  <Apple 상품 제외합니다>")
            continue
            
        price = item.find("strong", attrs={"class":"price-value"})
        
        if price:
            price = price.get_text()# 가격
        # 리뷰 100개 이상, 평점 4.5 이상 되는 것만 조회
        rate = item.find("em", attrs={"class":"rating"}) # 평점
        if rate:
            rate = rate.get_text()
        else:
            #print("  <평점 없는 상품 제외합니다>")
            continue

        rate_cnt = item.find("span", attrs={"class":"rating-total-count"}) # 평점 수 
        if rate_cnt:
            rate_cnt = rate_cnt.get_text()[1:-1] # 예 : (26)
        else:
            #print("  <평점 수 없는 상품 제외합니다>")
            continue

        link = item.find("a", attrs={"class":"search-product-link"})["href"]

        if float(rate) >= 4.5 and int(rate_cnt) >= 100:
            #print(name, price, rate, rate_cnt)
            print(f"제품명 : {name}")
            print(f"가격 : {price}")
            print(f"평점 : {rate}점 ({rate_cnt}개)")
            print("바로가기 : {}".format("https://www.coupang.com" + link))
            print("-"*100) # 줄긋기
            

제품명 : LG전자 2020 울트라 PC 14, 화이트, 셀러론, 128GB, 4GB, WIN10 Home, 14U390-ME1TK
가격 : 417,000
평점 : 4.5점 (373개)
바로가기 : https://www.coupang.com/vp/products/4841548763?itemId=6257652939&vendorItemId=73553348848
----------------------------------------------------------------------------------------------------
제품명 : 삼성전자 2021 갤럭시북 15.6, 미스틱 블루, 코어i5 11세대, 512GB, 16GB, WIN10 Home, NT750XDZ-A51AU
가격 : 1,161,150
평점 : 5.0점 (614개)
바로가기 : https://www.coupang.com/vp/products/5540996827?itemId=8724803682&vendorItemId=76011850910&pickType=COU_PICK
----------------------------------------------------------------------------------------------------
제품명 : 레노버 2021 노트북 15.6, Abyss Blue, ideaPad Slim3-15ALC R5 82KU, 라이젠5, 256GB, 20GB, Free DOS
가격 : 557,800
평점 : 5.0점 (179개)
바로가기 : https://www.coupang.com/vp/products/6148861528?itemId=11853952978&vendorItemId=79127153424
----------------------------------------------------------------------------------------------------
제품명 : LG전자 2020 울트라 PC 14, 화이트, 셀러론, 128G

제품명 : 한성컴퓨터 2021 TFG 17.3, 블랙, 라이젠7 3세대, 500GB, 16GB, Free DOS, TFG7476HS
가격 : 1,459,000
평점 : 5.0점 (138개)
바로가기 : https://www.coupang.com/vp/products/5196944537?itemId=7232945470&vendorItemId=74524493094
----------------------------------------------------------------------------------------------------
제품명 : LG 울트라PC 15U560 6세대 i5 지포스940M 15.6인치 윈도우10, SSD 128GB + HDD 500GB, 8GB, 포함
가격 : 469,000
평점 : 4.5점 (171개)
바로가기 : https://www.coupang.com/vp/products/6241227771?itemId=12599056071&vendorItemId=76740961824
----------------------------------------------------------------------------------------------------
제품명 : 삼성전자 2020 갤럭시북 이온 15.6, 아우라 실버, 코어i5 10세대, 256GB, 8GB, WIN10 Home, NT950XCR-G58A
가격 : 1,639,000
평점 : 5.0점 (224개)
바로가기 : https://www.coupang.com/vp/products/4672371918?itemId=2098033655&vendorItemId=70096918925
----------------------------------------------------------------------------------------------------
제품명 : 삼성전자 2021 갤럭시북 15.6, 미스틱 블루, 코어i5 11세대, 512GB, 16GB, WIN10 Hom

# Selenium 예제 

In [1]:
from selenium import webdriver

In [22]:
browser = webdriver.Chrome()
url = "https://flight.naver.com/"
browser.get(url) # url 로 이동
browser.find_element_by_xpath('//*[@id="__next"]/div/div[1]/div[4]/div/div/div[2]/div[2]/button[1]').click()

In [23]:
browser.find_element_by_xpath('//*[@id="__next"]/div/div[1]/div[9]/div[2]/div[1]/div[2]/div/div[2]/table/tbody/tr[3]/td[5]/button').click() # [0] -> 이번달
# browser.find

# Selenium 예제 (과제)

In [39]:
cine21_url = 'http://www.cine21.com/rank/person/'
path = '/Applications/chromedriver'
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome(path, options = options)

actors_info_list = list()
driver.get(cine21_url)

cine21_url = 'http://www.cine21.com/rank/person/'
path = '/Applications/chromedriver'
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome(path, options = options)

actors_info_list = list()
driver.get(cine21_url)

for i in range(1, 11):
    page = driver.find_element_by_xpath('//*[@id="rank_holder"]/div/div/a[{}]'.format(i))
    page.click()
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    actors = soup.select('li.people_li div.name')
    p = soup.select('li.people_li ul.num_info')
    r = soup.select('li.people_li span.grade')

    performance = list()
    rank = list()
    
    for i in p:
        performance.append(i.find('strong').text)
    for i in r:
        rank.append(i.text)

          
    for index, actor in enumerate(actors):
        actor_info_dict = dict()
        actor_name = re.sub('\(\w*\)', '', actor.text)
        actor_info_dict['이름'] = actor_name
        actor_info_dict['흥행지수'] = performance[index]
        actor_info_dict['랭킹'] = rank[index]

        actor_link = 'http://www.cine21.com' + actor.select_one('a')['href']
        response_actor = requests.get(actor_link)
        soup_actor = BeautifulSoup(response_actor.text, 'html.parser')
        default_info = soup_actor.select_one('ul.default_info')
        actor_details = default_info.select('li')
    
        for actor_detail in actor_details:
            actor_detail_key = actor_detail.select_one('span.tit').text
            actor_detail_value = re.sub('<span.*>.*</span>', '', str(actor_detail))
            actor_detail_value = re.sub('<.*?>', '', actor_detail_value)
            actor_info_dict[actor_detail_key] = actor_detail_value
        
        actors_info_list.append(actor_info_dict)  

In [40]:
actors_info_list

[{'이름': '강하늘',
  '흥행지수': '79,188',
  '랭킹': '1',
  '다른 이름': '김하늘',
  '직업': '배우',
  '생년월일': '1990-02-21',
  '성별': '남',
  '홈페이지': '\nhttp://weibo.com/galpos3?is_hot=1\n',
  '신장/체중': '181cm, 70kg',
  '학교': '중앙대학교 연극학과'},
 {'이름': '한효주',
  '흥행지수': '67,411',
  '랭킹': '2',
  '직업': '배우',
  '생년월일': '1987-02-22',
  '성별': '여',
  '홈페이지': '\nhttps://www.facebook.com/hhj.official\n',
  '신장/체중': '170cm',
  '학교': '동국대학교 연극영화',
  '취미': '영화감상'},
 {'이름': '이광수',
  '흥행지수': '59,525',
  '랭킹': '3',
  '직업': '배우',
  '생년월일': '1985-07-14',
  '성별': '남',
  '홈페이지': '\nhttps://twitter.com/masijacoke85\nhttps://www.instagram.com/masijacoke850714/\n',
  '신장/체중': '190cm',
  '소속사': '킹콩엔터테인먼트'},
 {'이름': '권상우',
  '흥행지수': '48,151',
  '랭킹': '4',
  '다른 이름': 'Kwon Sang Woo',
  '직업': '배우',
  '생년월일': '1976-08-05',
  '성별': '남',
  '신장/체중': '183cm, 72kg',
  '학교': '한남대학교 미술교육학 학사',
  '취미': '수영, 헬스, 복싱',
  '특기': '농구',
  '소속사': '벨액터스 엔터테인먼트'},
 {'이름': '조진웅',
  '흥행지수': '45,664',
  '랭킹': '5',
  '다른 이름': '조원준',
  '직업': '배우',
  '생년월일': '197