# RDBMS (관계형 데이터베이스)
- sql언어를 사용
- 규격이 엄격하고, 다양한 규격이 있으면 매번 수정해주어야 한다
- Mysql, Oracle, Postgresql, SQLlte, ...

### NoSQL
- sql을 사용하지 않음
- 고정된 스키마가 없다
- 정해진 규격이 엄격하지 않다
- Mongodb, redis, Hbase, cassandra

### Mongodb
- json구조로 data(document)를 관리
- sql : datebase > table > data(row, column)
- mongodb : database > collection > document



In [1]:
!pip install pymongo

Collecting pymongo
  Downloading pymongo-4.0.1-cp39-cp39-win_amd64.whl (354 kB)
Installing collected packages: pymongo
Successfully installed pymongo-4.0.1


In [1]:
import pymongo

In [2]:
conn = pymongo.MongoClient()

In [3]:
tdb = conn['testdb']

In [4]:
col_it = tdb['it']    # collection을 새로 생성과 동시에 연결

In [5]:
post = {'author' : 'Mike', 'text' : 'my first blog post', 'tags' : ['mongodb', 'python', 'pymongo']}
col_it.insert_one(post)

<pymongo.results.InsertOneResult at 0x2b628463640>

In [6]:
results = col_it.find()
for r in results:
    print(r)

{'_id': ObjectId('62034ec71f231e121c530c3e'), 'author': 'Mike', 'text': 'my first blog post', 'tags': ['mongodb', 'python', 'pymongo']}
{'_id': ObjectId('62034f961f231e121c530c40'), 'author': 'Dave Ahn', 'age': 25}
{'_id': ObjectId('620351061f231e121c530c43'), 'title': '암살', 'castings': ['이정재', '전지현', '하정우']}
{'_id': ObjectId('6203518a1f231e121c530c44'), 'title': '실미도', 'castings': ['설경구', '안성기'], 'datetime': {'year': '2003', 'month': 3, 'val': {'a': {'b': 1}}}}
{'_id': ObjectId('620352121f231e121c530c45'), 'name': 'aaron', 'age': 20}
{'_id': ObjectId('620352121f231e121c530c46'), 'name': 'bob', 'age': 30}
{'_id': ObjectId('620352121f231e121c530c47'), 'name': 'cathy', 'age': 25}
{'_id': ObjectId('620352121f231e121c530c48'), 'name': 'david', 'age': 27}
{'_id': ObjectId('620352121f231e121c530c49'), 'name': 'erick', 'age': 28}
{'_id': ObjectId('620352121f231e121c530c4a'), 'name': 'fox', 'age': 32}
{'_id': ObjectId('620352121f231e121c530c4b'), 'name': 'hmm'}
{'_id': ObjectId('62046b14a31950

In [10]:
col_it.insert_one({'author' : 'Dave Lee', 'age' : 45})

<pymongo.results.InsertOneResult at 0x26149744d00>

In [11]:
results = col_it.find()
for r in results:
    print(r)

{'_id': ObjectId('62034ec71f231e121c530c3e'), 'author': 'Mike', 'text': 'my first blog post', 'tags': ['mongodb', 'python', 'pymongo']}
{'_id': ObjectId('62034f221f231e121c530c3f'), 'author': 'Dave Lee', 'age': 45}


* insert_many()

In [7]:
col_it.insert_many(
   [
       {'author' : 'Dave Ahn', 'age' : 25},
       {'author' : 'Dave', 'age' : 35}
   ]
)

<pymongo.results.InsertManyResult at 0x2b627f25d00>

In [13]:
results = col_it.find()
for r in results:
    print(r)

{'_id': ObjectId('62034ec71f231e121c530c3e'), 'author': 'Mike', 'text': 'my first blog post', 'tags': ['mongodb', 'python', 'pymongo']}
{'_id': ObjectId('62034f221f231e121c530c3f'), 'author': 'Dave Lee', 'age': 45}
{'_id': ObjectId('62034f961f231e121c530c40'), 'author': 'Dave Ahn', 'age': 25}
{'_id': ObjectId('62034f961f231e121c530c41'), 'author': 'Dave', 'age': 35}


* document insert 하면서, _id(primary key)를 확인하는 법

In [14]:
post = {'author' : 'Dave', 'text' : 'my first blog post'}

post_id = col_it.insert_one(post)
post_id

<pymongo.results.InsertOneResult at 0x2614a735f00>

In [15]:
post_id.inserted_id

ObjectId('6203501a1f231e121c530c42')

* document count

In [17]:
col_it.count_documents({})

5

In [19]:
# col_it.count()

* 입력 : {}, 리스트, 중첩 딕셔너리

In [20]:
col_it.insert_one({'title' : '암살', 'castings' : ['이정재', '전지현', '하정우']})

<pymongo.results.InsertOneResult at 0x2614974c100>

In [21]:
col_it.insert_one(
   {
      'title' : '실미도',
      'castings' : ['설경구', '안성기'],
      'datetime' :
       {
           'year' : '2003',
           'month' : 3,
           'val' :
           {
               'a' :
               {
                   'b' : 1
               }
           }
       }
   }
)

<pymongo.results.InsertOneResult at 0x2614a73b900>

In [22]:
data = list()
data.append({'name' : 'aaron', 'age' : 20})
data.append({'name' : 'bob', 'age' : 30})
data.append({'name' : 'cathy', 'age' : 25})
data.append({'name' : 'david', 'age' : 27})
data.append({'name' : 'erick', 'age' : 28})
data.append({'name' : 'fox', 'age' : 32})
data.append({'name' : 'hmm'})

col_it.insert_many(data)

<pymongo.results.InsertManyResult at 0x26149741340>

In [23]:
col_it.count_documents({})

14

### document 검색하기

* find_one( {key : value} )

In [24]:
col_it.find_one()

{'_id': ObjectId('62034ec71f231e121c530c3e'),
 'author': 'Mike',
 'text': 'my first blog post',
 'tags': ['mongodb', 'python', 'pymongo']}

In [26]:
results = col_it.find()
for r in results:
    print(r)

{'_id': ObjectId('62034ec71f231e121c530c3e'), 'author': 'Mike', 'text': 'my first blog post', 'tags': ['mongodb', 'python', 'pymongo']}
{'_id': ObjectId('62034f221f231e121c530c3f'), 'author': 'Dave Lee', 'age': 45}
{'_id': ObjectId('62034f961f231e121c530c40'), 'author': 'Dave Ahn', 'age': 25}
{'_id': ObjectId('62034f961f231e121c530c41'), 'author': 'Dave', 'age': 35}
{'_id': ObjectId('6203501a1f231e121c530c42'), 'author': 'Dave', 'text': 'my first blog post'}
{'_id': ObjectId('620351061f231e121c530c43'), 'title': '암살', 'castings': ['이정재', '전지현', '하정우']}
{'_id': ObjectId('6203518a1f231e121c530c44'), 'title': '실미도', 'castings': ['설경구', '안성기'], 'datetime': {'year': '2003', 'month': 3, 'val': {'a': {'b': 1}}}}
{'_id': ObjectId('620352121f231e121c530c45'), 'name': 'aaron', 'age': 20}
{'_id': ObjectId('620352121f231e121c530c46'), 'name': 'bob', 'age': 30}
{'_id': ObjectId('620352121f231e121c530c47'), 'name': 'cathy', 'age': 25}
{'_id': ObjectId('620352121f231e121c530c48'), 'name': 'david', 'a

In [31]:
col_it.find_one({'author' : 'Dave'})

{'_id': ObjectId('62034f961f231e121c530c41'), 'author': 'Dave', 'age': 35}

In [29]:
results = col_it.find({'author' : 'Dave'})

for r in results:
    print(r)

{'_id': ObjectId('62034f961f231e121c530c41'), 'author': 'Dave', 'age': 35}
{'_id': ObjectId('6203501a1f231e121c530c42'), 'author': 'Dave', 'text': 'my first blog post'}


In [32]:
col_it.count_documents( {'author' : 'Dave'})

2

In [33]:
for r in col_it.find().sort('age'):
    print(r)

{'_id': ObjectId('62034ec71f231e121c530c3e'), 'author': 'Mike', 'text': 'my first blog post', 'tags': ['mongodb', 'python', 'pymongo']}
{'_id': ObjectId('6203501a1f231e121c530c42'), 'author': 'Dave', 'text': 'my first blog post'}
{'_id': ObjectId('620351061f231e121c530c43'), 'title': '암살', 'castings': ['이정재', '전지현', '하정우']}
{'_id': ObjectId('6203518a1f231e121c530c44'), 'title': '실미도', 'castings': ['설경구', '안성기'], 'datetime': {'year': '2003', 'month': 3, 'val': {'a': {'b': 1}}}}
{'_id': ObjectId('620352121f231e121c530c4b'), 'name': 'hmm'}
{'_id': ObjectId('620352121f231e121c530c45'), 'name': 'aaron', 'age': 20}
{'_id': ObjectId('62034f961f231e121c530c40'), 'author': 'Dave Ahn', 'age': 25}
{'_id': ObjectId('620352121f231e121c530c47'), 'name': 'cathy', 'age': 25}
{'_id': ObjectId('620352121f231e121c530c48'), 'name': 'david', 'age': 27}
{'_id': ObjectId('620352121f231e121c530c49'), 'name': 'erick', 'age': 28}
{'_id': ObjectId('620352121f231e121c530c46'), 'name': 'bob', 'age': 30}
{'_id': Ob

### document update : update_one(), update_many()

In [34]:
col_it.find_one( {'author' : 'Dave'})

{'_id': ObjectId('62034f961f231e121c530c41'), 'author': 'Dave', 'age': 35}

In [35]:
col_it.update_one( {'author' : 'Dave'}, 
                   {'$set' : {'text' : 'Hi Dave'}})

<pymongo.results.UpdateResult at 0x2614a72cd80>

In [43]:
for d in col_it.find( {'author' : 'Dave'} ):
    print(d)

{'_id': ObjectId('62034f961f231e121c530c41'), 'author': 'Dave', 'age': 40, 'text': 'Hi Dave'}
{'_id': ObjectId('6203501a1f231e121c530c42'), 'author': 'Dave', 'text': 'my first blog post'}


In [40]:
col_it.update_one( {'author' : 'Dave'},
                  {'$set' : {'age' : 40}}) 

<pymongo.results.UpdateResult at 0x26149733b80>

In [42]:
for d in col_it.find( {'author' : 'Dave'} ):
    print(d)

{'_id': ObjectId('62034f961f231e121c530c41'), 'author': 'Dave', 'age': 40, 'text': 'Hi Dave'}
{'_id': ObjectId('6203501a1f231e121c530c42'), 'author': 'Dave', 'text': 'my first blog post'}


In [44]:
col_it.update_many( {'author' : 'Dave'},
                  {'$set' : {'text' : 'hi dave'}})

<pymongo.results.UpdateResult at 0x2614a72c140>

In [45]:
for d in col_it.find( {'author' : 'Dave'} ):
    print(d)

{'_id': ObjectId('62034f961f231e121c530c41'), 'author': 'Dave', 'age': 40, 'text': 'hi dave'}
{'_id': ObjectId('6203501a1f231e121c530c42'), 'author': 'Dave', 'text': 'hi dave'}


#### document delete : delete_one(), delete_many()

In [46]:
for d in col_it.find( {'author' : 'Dave Lee'} ):
    print(d)

{'_id': ObjectId('62034f221f231e121c530c3f'), 'author': 'Dave Lee', 'age': 45}


In [47]:
col_it.delete_one( {'author' : 'Dave Lee'})

<pymongo.results.DeleteResult at 0x2614a7dd600>

In [49]:
for d in col_it.find( {'author' : 'Dave Lee'} ):
    print(d)
    
# delete해서 아무것도 안나옴

In [50]:
for d in col_it.find( {'author' : 'Dave'} ):
    print(d)

{'_id': ObjectId('62034f961f231e121c530c41'), 'author': 'Dave', 'age': 40, 'text': 'hi dave'}
{'_id': ObjectId('6203501a1f231e121c530c42'), 'author': 'Dave', 'text': 'hi dave'}


In [51]:
col_it.delete_many( {'author' : 'Dave'})

<pymongo.results.DeleteResult at 0x261490f70c0>

In [53]:
for d in col_it.find( {'author' : 'Dave'} ):
    print(d)
    
# delete해서 아무것도 안나옴

In [59]:
boos = conn.books   # 새로운 database

In [60]:
it_book = boos.it_books    # 새로운 collections

In [61]:
data = list()
for index in range(100):
    data.append( {'author' : 'Dave Lee', 'publisher' : 'bit_company', 'number' : index})

In [62]:
data

[{'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 0},
 {'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 1},
 {'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 2},
 {'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 3},
 {'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 4},
 {'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 5},
 {'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 6},
 {'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 7},
 {'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 8},
 {'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 9},
 {'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 10},
 {'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 11},
 {'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 12},
 {'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 13},
 {'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 14},
 {'au

In [63]:
it_book.insert_many(data)

<pymongo.results.InsertManyResult at 0x261490c02c0>

In [64]:
docs = it_book.find()

for doc in docs:
    print(doc)

{'_id': ObjectId('620356ea1f231e121c530c4c'), 'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 0}
{'_id': ObjectId('620356ea1f231e121c530c4d'), 'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 1}
{'_id': ObjectId('620356ea1f231e121c530c4e'), 'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 2}
{'_id': ObjectId('620356ea1f231e121c530c4f'), 'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 3}
{'_id': ObjectId('620356ea1f231e121c530c50'), 'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 4}
{'_id': ObjectId('620356ea1f231e121c530c51'), 'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 5}
{'_id': ObjectId('620356ea1f231e121c530c52'), 'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 6}
{'_id': ObjectId('620356ea1f231e121c530c53'), 'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 7}
{'_id': ObjectId('620356ea1f231e121c530c54'), 'author': 'Dave Lee', 'publisher': 'bit_company', 'number': 8}
{'_id': ObjectId('6

In [66]:
it_book.update_many({}, {'$set' : {'publisher' : 'bit_camp_pub'}})

<pymongo.results.UpdateResult at 0x26149208a80>

In [67]:
docs = it_book.find()

for doc in docs:
    print(doc)

{'_id': ObjectId('620356ea1f231e121c530c4c'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 0}
{'_id': ObjectId('620356ea1f231e121c530c4d'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 1}
{'_id': ObjectId('620356ea1f231e121c530c4e'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 2}
{'_id': ObjectId('620356ea1f231e121c530c4f'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 3}
{'_id': ObjectId('620356ea1f231e121c530c50'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 4}
{'_id': ObjectId('620356ea1f231e121c530c51'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 5}
{'_id': ObjectId('620356ea1f231e121c530c52'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 6}
{'_id': ObjectId('620356ea1f231e121c530c53'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 7}
{'_id': ObjectId('620356ea1f231e121c530c54'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 8}
{'_id': Ob

In [68]:
it_book.delete_many( {'number' : {'$gte' : 6}})

<pymongo.results.DeleteResult at 0x2614a73b880>

In [69]:
docs = it_book.find()

for doc in docs:
    print(doc)

{'_id': ObjectId('620356ea1f231e121c530c4c'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 0}
{'_id': ObjectId('620356ea1f231e121c530c4d'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 1}
{'_id': ObjectId('620356ea1f231e121c530c4e'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 2}
{'_id': ObjectId('620356ea1f231e121c530c4f'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 3}
{'_id': ObjectId('620356ea1f231e121c530c50'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 4}
{'_id': ObjectId('620356ea1f231e121c530c51'), 'author': 'Dave Lee', 'publisher': 'bit_camp_pub', 'number': 5}


In [70]:
# crawling cine21

In [10]:
import requests
from bs4 import BeautifulSoup

In [126]:
url = 'http://www.cine21.com/rank/person'

In [127]:
res = requests.get(url)
main_soup = BeautifulSoup(res.text, 'html.parser')
main_soup

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta content="1641311652800771" property="fb:pages"/>
<meta content="vTM0gmeRzJwn1MIM1LMSp3cxP_SaBzch1ziRY255RHw" name="google-site-verification"/>
<meta content="5yOe6b_e_3rr7vNDwgXJw_8wLZQGx4lJ_V48KNPrqkA" name="google-site-verification"/>
<meta content="20defde86fc4464f2693891567a98905bd0a60d1" name="naver-site-verification"/>
<meta content="dmds9ks357rhqvdnk" name="dailymotion-domain-verification"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<title>씨네21</title>
<link href="/inc/www/css/default1.css" media="all" rel="stylesheet" type="text/css"/>
<link href="/inc/www/css/content1.css" media="all" rel="stylesheet" type="text/css"/>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.12.0/jquery.min.js"></script>
<meta conten

In [129]:
main_soup.select('div#rank_holder')

[<div id="rank_holder"></div>]

##### post crawling

In [8]:
import re

In [131]:
url = 'http://www.cine21.com/rank/person/content'

In [132]:
month = '2022-01'

data = {'section' : 'actor',
       'period_start' : month,
       'gender' : 'all',
       'page' : 1}

In [133]:
res = requests.post(url, data = data)
main_soup = BeautifulSoup(res.text, 'html.parser')
main_soup

 <ul class="people_list">
<li class="people_li">
<a href="/db/person/info/?person_id=78487"><img alt="" class="people_thumb" src="https://image.cine21.com/resize/cine21/still/2017/1207/15_06_46__5a28da76c2e01[X145,145].jpg" target="_blank"/></a>
<div class="name"><a href="/db/person/info/?person_id=78487">강하늘(2편)</a></div>
<ul class="num_info">
<li><span class="tit">흥행지수</span><strong>80,090</strong></li>
<!--
						<li><a href="#" class="btn_graph"><span class="ico"></span><span>흥행성적<br />그래프로 보기</span></a></li>
						-->
</ul>
<!-- 영화포스터는 최대 5개까지만 -->
<ul class="mov_list">
<li>
<a href="/movie/info/?movie_id=56540">
<img alt="" class="thumb" src="https://image.cine21.com/resize/cine21/poster/2022/0127/56540_61f1fcfdd84ce[X85,120].jpg" target="_blank"/>
<span>해적: 도깨비 깃발</span>
</a>
</li>
<li>
<a href="/movie/info/?movie_id=57948">
<img alt="" class="thumb" src="https://image.cine21.com/resize/cine21/poster/2021/1213/11_07_08__61b6aacc130e8[X85,120].jpg" target="_blank"/>
<span>해피 뉴 이어

In [88]:
tags = soup.select('li.people_li > div.name')
len(tags)

7

In [89]:
tags

[<div class="name"><a href="/db/person/info/?person_id=78487">강하늘(2편)</a></div>,
 <div class="name"><a href="/db/person/info/?person_id=56311">한효주(1편)</a></div>,
 <div class="name"><a href="/db/person/info/?person_id=71308">이광수(2편)</a></div>,
 <div class="name"><a href="/db/person/info/?person_id=15225">권상우(1편)</a></div>,
 <div class="name"><a href="/db/person/info/?person_id=60358">조진웅(2편)</a></div>,
 <div class="name"><a href="/db/person/info/?person_id=20772">박희순(1편)</a></div>,
 <div class="name"><a href="/db/person/info/?person_id=95811">채수빈(1편)</a></div>]

In [101]:
main_url = 'http://www.cine21.com'

for t in tags:
    print(main_url + t.select('a')[0]['href'])
    print(re.sub('\(\w+\)', '', t.text))

http://www.cine21.com/db/person/info/?person_id=78487
강하늘
http://www.cine21.com/db/person/info/?person_id=56311
한효주
http://www.cine21.com/db/person/info/?person_id=71308
이광수
http://www.cine21.com/db/person/info/?person_id=15225
권상우
http://www.cine21.com/db/person/info/?person_id=60358
조진웅
http://www.cine21.com/db/person/info/?person_id=20772
박희순
http://www.cine21.com/db/person/info/?person_id=95811
채수빈


In [105]:
actor_url = 'http://www.cine21.com/db/person/info/?person_id=78487'

In [107]:
res = requests.get(actor_url)
soup = BeautifulSoup(res.text, 'html.parser')
actor_datas = soup.select('ul.default_info')
actor_datas

[<ul class="default_info">
 <li><span class="tit">다른 이름</span>김하늘</li>
 <li><span class="tit">직업</span>배우</li>
 <li><span class="tit">생년월일</span>1990-02-21</li>
 <li><span class="tit">성별</span>남</li>
 <li><span class="tit">홈페이지</span>
 <a href="http://weibo.com/galpos3?is_hot=1" target="_blank">http://weibo.com/galpos3?is_hot=1</a><br/>
 </li>
 <li><span class="tit">신장/체중</span>181cm, 70kg</li>
 <li><span class="tit">학교</span>중앙대학교 연극학과</li>
 </ul>]

In [110]:
actor_datas[0].select('li')

[<li><span class="tit">다른 이름</span>김하늘</li>,
 <li><span class="tit">직업</span>배우</li>,
 <li><span class="tit">생년월일</span>1990-02-21</li>,
 <li><span class="tit">성별</span>남</li>,
 <li><span class="tit">홈페이지</span>
 <a href="http://weibo.com/galpos3?is_hot=1" target="_blank">http://weibo.com/galpos3?is_hot=1</a><br/>
 </li>,
 <li><span class="tit">신장/체중</span>181cm, 70kg</li>,
 <li><span class="tit">학교</span>중앙대학교 연극학과</li>]

In [120]:
actor_info_dict = dict()

for li in actor_datas[0].select('li'):
     
    key = li.select_one('span.tit').text
    
    li = re.sub('<span.*?>.*?</span>', '', str(li))
    value = re.sub('<.+?>', '', li)
    
    actor_info_dict[key] = value.strip()
    
actor_info_dict

{'다른 이름': '김하늘',
 '직업': '배우',
 '생년월일': '1990-02-21',
 '성별': '남',
 '홈페이지': 'http://weibo.com/galpos3?is_hot=1',
 '신장/체중': '181cm, 70kg',
 '학교': '중앙대학교 연극학과'}

##### 흥행지수 뽑기

In [134]:
for s in main_soup.select('li.people_li ul.num_info strong'):
    print(int(s.text.replace(',', '')))

80090
68142
60206
48673
47173
41931
38939


In [135]:
#### 영화 리스트 뽑기

In [143]:
movie_list = []
for s in main_soup.select('li.people_li ul.mov_list'):
    actor_movie = []
    for l in s.select('span'):
        actor_movie.append(l.text.strip())
    movie_list.append(actor_movie)

In [144]:
movie_list

[['해적: 도깨비 깃발', '해피 뉴 이어'],
 ['해적: 도깨비 깃발'],
 ['해적: 도깨비 깃발', '해피 뉴 이어'],
 ['해적: 도깨비 깃발'],
 ['경관의 피', '1984 최동원'],
 ['경관의 피'],
 ['해적: 도깨비 깃발']]