In [4]:
import pandas as pd
from sklearn.datasets import fetch_20newsgroups
import nltk
from nltk.corpus import stopwords

In [6]:
dataset = fetch_20newsgroups(shuffle=True, random_state=42, remove=('headers', 'footers', 'quotes'))
documents = dataset.data
print('샘플의 수 :', len(documents))

샘플의 수 : 11314


In [9]:
documents[0]

'I was wondering if anyone out there could enlighten me on this car I saw\nthe other day. It was a 2-door sports car, looked to be from the late 60s/\nearly 70s. It was called a Bricklin. The doors were really small. In addition,\nthe front bumper was separate from the rest of the body. This is \nall I know. If anyone can tellme a model name, engine specs, years\nof production, where this car is made, history, or whatever info you\nhave on this funky looking car, please e-mail.'

In [10]:
dataset.target_names

['alt.atheism',
 'comp.graphics',
 'comp.os.ms-windows.misc',
 'comp.sys.ibm.pc.hardware',
 'comp.sys.mac.hardware',
 'comp.windows.x',
 'misc.forsale',
 'rec.autos',
 'rec.motorcycles',
 'rec.sport.baseball',
 'rec.sport.hockey',
 'sci.crypt',
 'sci.electronics',
 'sci.med',
 'sci.space',
 'soc.religion.christian',
 'talk.politics.guns',
 'talk.politics.mideast',
 'talk.politics.misc',
 'talk.religion.misc']

In [13]:
# 전처리 - Cleaning(정제)

news_df = pd.DataFrame({'document':documents})
# 특수 문자 제거
news_df['cleaned_doc'] = news_df['document'].str.replace("[^a-zA-Z]"," ")
# 길이가 3 이하인 단어는 제거 (길이가 짧은 단어 제거)
news_df['cleaned_doc'] = news_df['cleaned_doc'].apply(lambda x: ' '.join([w for w in x.split() if len(w)>3]))
# 전체 단어에 대한 소문자 변환
news_df['cleaned_doc'] = news_df['cleaned_doc'].apply(lambda x: x.lower())

  news_df['cleaned_doc'] = news_df['document'].str.replace("[^a-zA-Z]"," ")


In [14]:
news_df['cleaned_doc'][0]

'wondering anyone there could enlighten this other door sports looked from late early called bricklin doors were really small addition front bumper separate from rest body this know anyone tellme model name engine specs years production where this made history whatever info have this funky looking please mail'

In [16]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /home/hyejin/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [18]:
stop_words = stopwords.words('english')
tokenized_doc = news_df['cleaned_doc'].apply(lambda x: x.split()) # 토큰화
tokenized_doc = tokenized_doc.apply(lambda x: [item for item in x if item not in stop_words])
# 불용어 제거

In [20]:
print(tokenized_doc[0])

['wondering', 'anyone', 'could', 'enlighten', 'door', 'sports', 'looked', 'late', 'early', 'called', 'bricklin', 'doors', 'really', 'small', 'addition', 'front', 'bumper', 'separate', 'rest', 'body', 'know', 'anyone', 'tellme', 'model', 'name', 'engine', 'specs', 'years', 'production', 'made', 'history', 'whatever', 'info', 'funky', 'looking', 'please', 'mail']


In [22]:
print(tokenized_doc.head())

0    [wondering, anyone, could, enlighten, door, sp...
1    [fair, number, brave, souls, upgraded, clock, ...
2    [well, folks, plus, finally, gave, ghost, week...
3    [weitek, address, phone, number, like, informa...
4    [article, owcb, world, tombaker, world, baker,...
Name: cleaned_doc, dtype: object


In [23]:
# 정수 인코딩, 단어 집합 만들기

from gensim import corpora
dictionary = corpora.Dictionary(tokenized_doc)
corpus = [dictionary.doc2bow(text) for text in tokenized_doc]
print(corpus[0])

[(0, 1), (1, 2), (2, 1), (3, 1), (4, 1), (5, 1), (6, 1), (7, 1), (8, 1), (9, 1), (10, 1), (11, 1), (12, 1), (13, 1), (14, 1), (15, 1), (16, 1), (17, 1), (18, 1), (19, 1), (20, 1), (21, 1), (22, 1), (23, 1), (24, 1), (25, 1), (26, 1), (27, 1), (28, 1), (29, 1), (30, 1), (31, 1), (32, 1), (33, 1), (34, 1), (35, 1)]


In [24]:
print(corpus[1])

[(24, 2), (36, 1), (37, 1), (38, 1), (39, 1), (40, 1), (41, 1), (42, 1), (43, 2), (44, 1), (45, 1), (46, 1), (47, 1), (48, 1), (49, 2), (50, 1), (51, 1), (52, 1), (53, 1), (54, 1), (55, 1), (56, 1), (57, 1), (58, 1), (59, 1), (60, 1), (61, 1), (62, 2), (63, 1), (64, 1), (65, 1), (66, 1), (67, 1), (68, 1), (69, 1), (70, 2), (71, 1), (72, 1), (73, 1), (74, 1), (75, 1)]


In [26]:
print('24로 정수 인코딩이 되기 전 단어 :', dictionary[24])

24로 정수 인코딩이 되기 전 단어 : please


In [29]:
print('총 학습된 단어의 개수 :', len(dictionary))
print(dictionary)

총 학습된 단어의 개수 : 64281
Dictionary(64281 unique tokens: ['addition', 'anyone', 'body', 'bricklin', 'bumper']...)


In [30]:
import gensim
NUM_TOPICS = 20    # 토픽의 수
ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = NUM_TOPICS, id2word=dictionary, passes=15)
# passes : epoch와 유사
topics = ldamodel.print_topics(num_words=4)    # 디폴트 : 10
for topic in topics:
    print(topic)

(0, '0.020*"game" + 0.020*"team" + 0.016*"year" + 0.013*"games"')
(1, '0.035*"israel" + 0.020*"period" + 0.016*"israeli" + 0.012*"water"')
(2, '0.014*"would" + 0.014*"people" + 0.007*"think" + 0.005*"right"')
(3, '0.011*"armenian" + 0.011*"turkish" + 0.010*"jews" + 0.008*"armenians"')
(4, '0.019*"space" + 0.010*"data" + 0.009*"encryption" + 0.008*"chip"')
(5, '0.009*"temple" + 0.009*"tyre" + 0.008*"height" + 0.008*"bobby"')
(6, '0.015*"mail" + 0.013*"file" + 0.012*"please" + 0.012*"information"')
(7, '0.007*"science" + 0.006*"evidence" + 0.005*"theory" + 0.005*"book"')
(8, '0.016*"would" + 0.016*"like" + 0.011*"good" + 0.009*"much"')
(9, '0.012*"drive" + 0.009*"system" + 0.009*"card" + 0.007*"windows"')
(10, '0.017*"said" + 0.013*"went" + 0.008*"told" + 0.008*"came"')
(11, '0.026*"food" + 0.016*"nrhj" + 0.011*"wwiz" + 0.009*"gizw"')
(12, '0.032*"church" + 0.014*"catholic" + 0.010*"metal" + 0.009*"pope"')
(13, '0.012*"jesus" + 0.009*"believe" + 0.009*"people" + 0.009*"would"')
(14, '0.0

In [32]:
# 토픽 별 단어 분포 시각화
import pyLDAvis.gensim_models

pyLDAvis.enable_notebook()    # 넣고 안넣고 차이가 뭐지..????????????
vis = pyLDAvis.gensim_models.prepare(ldamodel, corpus, dictionary)
pyLDAvis.display(vis)

In [36]:
# 문서 별 토픽 분포 확인
for i, topic_list in enumerate(ldamodel[corpus]):
    print(i, '번째 문서의 topic 비율은', topic_list)
# (topic 번호, 비중)

0 번째 문서의 topic 비율은 [(2, 0.15157491), (6, 0.16129908), (8, 0.38359785), (9, 0.07096366), (10, 0.13574687), (17, 0.07776317)]
1 번째 문서의 topic 비율은 [(6, 0.14701205), (7, 0.2321858), (8, 0.12579112), (9, 0.34747034), (15, 0.09035778), (18, 0.042273425)]
2 번째 문서의 topic 비율은 [(0, 0.04179654), (2, 0.050975926), (6, 0.07779689), (8, 0.55116653), (9, 0.22781907), (14, 0.03641978)]
3 번째 문서의 topic 비율은 [(4, 0.28464866), (6, 0.39774343), (8, 0.2112044)]
4 번째 문서의 topic 비율은 [(3, 0.14372194), (4, 0.09284305), (7, 0.35100123), (9, 0.30992088), (18, 0.07827826)]
5 번째 문서의 topic 비율은 [(2, 0.32280183), (7, 0.30767158), (13, 0.34770632)]
6 번째 문서의 topic 비율은 [(2, 0.35512137), (3, 0.060433704), (6, 0.27821603), (9, 0.2795039)]
7 번째 문서의 topic 비율은 [(9, 0.99128217)]
8 번째 문서의 topic 비율은 [(6, 0.9208158)]
9 번째 문서의 topic 비율은 [(2, 0.045504805), (6, 0.3233953), (8, 0.11215717), (9, 0.48105717), (17, 0.026983302)]
10 번째 문서의 topic 비율은 [(5, 0.33099797), (6, 0.048676725), (8, 0.5721641), (14, 0.031101324)]
11 번째 문서의 topic 비율은 [

231 번째 문서의 topic 비율은 [(2, 0.26786363), (6, 0.18451591), (8, 0.26289555), (9, 0.14487085), (18, 0.12316632)]
232 번째 문서의 topic 비율은 [(4, 0.22813162), (9, 0.6818354)]
233 번째 문서의 topic 비율은 [(0, 0.21330862), (2, 0.091712244), (7, 0.2681555), (8, 0.3405137), (14, 0.045516443), (17, 0.026751839)]
234 번째 문서의 topic 비율은 [(4, 0.12354706), (8, 0.16681136), (9, 0.6065375), (16, 0.055804577)]
235 번째 문서의 topic 비율은 [(6, 0.6947738), (9, 0.26422042)]
236 번째 문서의 topic 비율은 [(0, 0.10537821), (6, 0.106584415), (8, 0.3098769), (10, 0.14538193), (14, 0.16954495), (18, 0.08239547), (19, 0.049753357)]
237 번째 문서의 topic 비율은 [(8, 0.58793026), (15, 0.25457713), (16, 0.08651396)]
238 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.05), (12, 0.05), (13, 0.05), (14, 0.05), (15, 0.05), (16, 0.05), (17, 0.05), (18, 0.05), (19, 0.05)]
239 번째 문서의 topic 비율은 [(5, 0.032236837), (6, 0.32914904), (7, 0.09569494), (9, 0.35787973), 

472 번째 문서의 topic 비율은 [(0, 0.4068522), (2, 0.11262824), (4, 0.027935673), (6, 0.043179326), (8, 0.27536732), (14, 0.05572486), (17, 0.04677009), (18, 0.022414524)]
473 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.05), (12, 0.05), (13, 0.05), (14, 0.05), (15, 0.05), (16, 0.05), (17, 0.05), (18, 0.05), (19, 0.05)]
474 번째 문서의 topic 비율은 [(2, 0.09708372), (4, 0.29821384), (7, 0.08788598), (8, 0.42131132), (15, 0.05971216)]
475 번째 문서의 topic 비율은 [(1, 0.06722296), (3, 0.32938978), (8, 0.16164608), (13, 0.27820307), (14, 0.14565289)]
476 번째 문서의 topic 비율은 [(2, 0.25670278), (7, 0.05622493), (12, 0.3307874), (13, 0.34627736)]
477 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.05), (12, 0.05), (13, 0.05), (14, 0.05), (15, 0.05), (16, 0.05), (17, 0.05), (18, 0.05), (19, 0.05)]
478 번째 문서의 topic 비율은 [(8,

714 번째 문서의 topic 비율은 [(0, 0.595668), (8, 0.2596347), (19, 0.10217352)]
715 번째 문서의 topic 비율은 [(0, 0.058015622), (7, 0.09264771), (12, 0.050704245), (13, 0.7790634)]
716 번째 문서의 topic 비율은 [(2, 0.27055836), (7, 0.4513054), (8, 0.1603059), (10, 0.1069889)]
717 번째 문서의 topic 비율은 [(8, 0.6450479), (9, 0.3019398)]
718 번째 문서의 topic 비율은 [(0, 0.05186327), (13, 0.6871256), (14, 0.122965805), (18, 0.10156827)]
719 번째 문서의 topic 비율은 [(2, 0.09562404), (7, 0.029959684), (8, 0.12814876), (13, 0.7297306)]
720 번째 문서의 topic 비율은 [(0, 0.32488936), (5, 0.070464864), (8, 0.2676084), (10, 0.06780044), (14, 0.10228724), (15, 0.120251454)]
721 번째 문서의 topic 비율은 [(12, 0.14997412), (13, 0.72143126)]
722 번째 문서의 topic 비율은 [(0, 0.06223252), (6, 0.23749797), (8, 0.5977375), (16, 0.06023544)]
723 번째 문서의 topic 비율은 [(2, 0.17627716), (6, 0.15220954), (7, 0.09684584), (8, 0.14654963), (13, 0.02003078), (14, 0.069132), (16, 0.08938809), (18, 0.21930657)]
724 번째 문서의 topic 비율은 [(0, 0.46516997), (2, 0.29561013), (7, 0.08833246), (

919 번째 문서의 topic 비율은 [(2, 0.22693944), (3, 0.08059575), (8, 0.027057646), (10, 0.63663644), (13, 0.027057465)]
920 번째 문서의 topic 비율은 [(0, 0.46694896), (2, 0.12833013), (3, 0.011369942), (8, 0.23361148), (18, 0.09675038), (19, 0.057858143)]
921 번째 문서의 topic 비율은 [(2, 0.68879426), (6, 0.054729342), (8, 0.19601925), (16, 0.047944374)]
922 번째 문서의 topic 비율은 [(5, 0.040442377), (6, 0.07696802), (7, 0.14848855), (8, 0.34759012), (10, 0.06442326), (16, 0.29511184)]
923 번째 문서의 topic 비율은 [(0, 0.43294048), (2, 0.28823864), (7, 0.04469857), (8, 0.116427496), (13, 0.10178323)]
924 번째 문서의 topic 비율은 [(1, 0.33879486), (3, 0.015943406), (8, 0.4640954), (10, 0.06047951), (14, 0.020606566), (18, 0.028524179), (19, 0.062240887)]
925 번째 문서의 topic 비율은 [(4, 0.06297197), (8, 0.5459087), (9, 0.3617142)]
926 번째 문서의 topic 비율은 [(2, 0.7260347), (15, 0.23482485)]
927 번째 문서의 topic 비율은 [(5, 0.019944873), (6, 0.1120088), (8, 0.16839465), (9, 0.6680468), (15, 0.017647337)]
928 번째 문서의 topic 비율은 [(2, 0.23510455), (3, 0.7182

1127 번째 문서의 topic 비율은 [(1, 0.011867443), (2, 0.3611409), (4, 0.02985741), (5, 0.02458028), (6, 0.016798615), (7, 0.11973424), (10, 0.09034203), (12, 0.074595705), (13, 0.19513054), (19, 0.071027726)]
1128 번째 문서의 topic 비율은 [(0, 0.21230383), (2, 0.037479907), (3, 0.03442914), (4, 0.06247317), (6, 0.32201138), (7, 0.012128036), (9, 0.11175454), (13, 0.059485186), (14, 0.018962612), (16, 0.019569507), (18, 0.047632765), (19, 0.033750247)]
1129 번째 문서의 topic 비율은 [(2, 0.10592127), (6, 0.02361499), (8, 0.18278131), (10, 0.040838696), (12, 0.16046776), (13, 0.46429697), (16, 0.015501928)]
1130 번째 문서의 topic 비율은 [(0, 0.4628568), (2, 0.34989473), (15, 0.030225841), (16, 0.045641568), (19, 0.10618834)]
1131 번째 문서의 topic 비율은 [(2, 0.6884477), (7, 0.21150607)]
1132 번째 문서의 topic 비율은 [(2, 0.18916348), (3, 0.07313403), (5, 0.018010974), (6, 0.026592785), (7, 0.41899672), (13, 0.25885078)]
1133 번째 문서의 topic 비율은 [(1, 0.059709873), (2, 0.2686494), (3, 0.2673546), (5, 0.041408516), (7, 0.044881392), (13, 0.1

1329 번째 문서의 topic 비율은 [(8, 0.38702738), (10, 0.5838859)]
1330 번째 문서의 topic 비율은 [(8, 0.51968265), (9, 0.2616779), (19, 0.15320961)]
1331 번째 문서의 topic 비율은 [(1, 0.22536533), (2, 0.3369327), (4, 0.04821138), (8, 0.16977069), (9, 0.14723156), (13, 0.017886974), (17, 0.036592714)]
1332 번째 문서의 topic 비율은 [(0, 0.02076421), (2, 0.41838345), (3, 0.17786422), (7, 0.013831548), (8, 0.12048371), (13, 0.20161772), (14, 0.038650297)]
1333 번째 문서의 topic 비율은 [(3, 0.094753094), (5, 0.10741353), (7, 0.14961067), (10, 0.040778074), (13, 0.39268038), (15, 0.19580173)]
1334 번째 문서의 topic 비율은 [(3, 0.39365876), (13, 0.5688314)]
1335 번째 문서의 topic 비율은 [(0, 0.025000582), (1, 0.025000582), (2, 0.025000582), (3, 0.025000582), (4, 0.025000582), (5, 0.025000582), (6, 0.025000582), (7, 0.025000582), (8, 0.025000582), (9, 0.025000582), (10, 0.025000582), (11, 0.025000582), (12, 0.025000582), (13, 0.025000582), (14, 0.52498895), (15, 0.025000582), (16, 0.025000582), (17, 0.025000582), (18, 0.025000582), (19, 0.025000582)]

1553 번째 문서의 topic 비율은 [(0, 0.35751247), (2, 0.58621854)]
1554 번째 문서의 topic 비율은 [(0, 0.011698751), (4, 0.053271957), (6, 0.6043112), (7, 0.08681456), (8, 0.010127733), (9, 0.049107186), (14, 0.13850655), (16, 0.016292095), (18, 0.0131567)]
1555 번째 문서의 topic 비율은 [(0, 0.2560932), (6, 0.33078134), (7, 0.07033978), (15, 0.14001827), (16, 0.09978921), (17, 0.061773017)]
1556 번째 문서의 topic 비율은 [(2, 0.48202518), (4, 0.13582897), (8, 0.2118086), (9, 0.034557037), (12, 0.101024), (14, 0.013271681), (18, 0.014170118)]
1557 번째 문서의 topic 비율은 [(4, 0.030551568), (5, 0.16821781), (7, 0.18857771), (13, 0.5904051)]
1558 번째 문서의 topic 비율은 [(2, 0.33227247), (8, 0.5676415), (12, 0.05526624)]
1559 번째 문서의 topic 비율은 [(2, 0.8686755), (3, 0.06305963), (15, 0.059114754)]
1560 번째 문서의 topic 비율은 [(2, 0.71079296), (4, 0.18898514)]
1561 번째 문서의 topic 비율은 [(2, 0.2932714), (3, 0.45995304), (7, 0.23305792)]
1562 번째 문서의 topic 비율은 [(2, 0.58573323), (7, 0.055092398), (8, 0.14841564), (10, 0.18212092)]
1563 번째 문서의 topic 비율은 [(

1793 번째 문서의 topic 비율은 [(6, 0.033567198), (8, 0.0738025), (9, 0.027706025), (10, 0.07990134), (13, 0.6912102), (19, 0.07817675)]
1794 번째 문서의 topic 비율은 [(9, 0.83386636), (10, 0.11873232)]
1795 번째 문서의 topic 비율은 [(6, 0.048397623), (8, 0.37017468), (9, 0.51044893), (14, 0.017151145), (18, 0.04031159)]
1796 번째 문서의 topic 비율은 [(1, 0.022490626), (6, 0.19207548), (7, 0.03818127), (8, 0.19186446), (9, 0.48600012), (10, 0.054150637)]
1797 번째 문서의 topic 비율은 [(2, 0.23122242), (4, 0.5027131), (8, 0.1733553), (9, 0.036457133), (11, 0.048880152)]
1798 번째 문서의 topic 비율은 [(2, 0.20198746), (6, 0.6518437), (12, 0.080639504)]
1799 번째 문서의 topic 비율은 [(2, 0.2164866), (3, 0.13039176), (8, 0.5175296), (10, 0.029469581), (15, 0.09317147)]
1800 번째 문서의 topic 비율은 [(6, 0.081253126), (7, 0.22349747), (8, 0.31075546), (9, 0.25731108), (15, 0.04734893), (19, 0.06581205)]
1801 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.0

2010 번째 문서의 topic 비율은 [(0, 0.01667173), (1, 0.68323714), (2, 0.01667173), (3, 0.01667173), (4, 0.01667173), (5, 0.01667173), (6, 0.01667173), (7, 0.01667173), (8, 0.01667173), (9, 0.01667173), (10, 0.01667173), (11, 0.01667173), (12, 0.01667173), (13, 0.01667173), (14, 0.01667173), (15, 0.01667173), (16, 0.01667173), (17, 0.01667173), (18, 0.01667173), (19, 0.01667173)]
2011 번째 문서의 topic 비율은 [(8, 0.19911632), (9, 0.7376115), (19, 0.03492301)]
2012 번째 문서의 topic 비율은 [(2, 0.07724151), (3, 0.020728331), (8, 0.042819824), (12, 0.016378287), (13, 0.8183489), (18, 0.02167915)]
2013 번째 문서의 topic 비율은 [(2, 0.30745575), (6, 0.2699163), (12, 0.08818811), (13, 0.122625545), (16, 0.051459104), (19, 0.14823629)]
2014 번째 문서의 topic 비율은 [(4, 0.037154034), (6, 0.18145333), (8, 0.32136226), (9, 0.30365396), (10, 0.03806176), (14, 0.027347844), (17, 0.07581652)]
2015 번째 문서의 topic 비율은 [(0, 0.24043588), (2, 0.5860146), (7, 0.10266882)]
2016 번째 문서의 topic 비율은 [(6, 0.3014069), (8, 0.3148323), (9, 0.2732602), (1

2213 번째 문서의 topic 비율은 [(9, 0.81944686), (13, 0.11153374), (18, 0.05199303)]
2214 번째 문서의 topic 비율은 [(2, 0.29938185), (7, 0.16368617), (8, 0.3730275), (11, 0.1115604), (15, 0.017520446), (19, 0.021758517)]
2215 번째 문서의 topic 비율은 [(1, 0.4553934), (2, 0.12787466), (4, 0.084282555), (8, 0.32466993)]
2216 번째 문서의 topic 비율은 [(8, 0.7121512), (10, 0.14584917), (12, 0.03726738), (18, 0.07612028)]
2217 번째 문서의 topic 비율은 [(1, 0.059342396), (4, 0.05402034), (8, 0.1723326), (9, 0.656289), (10, 0.037516244)]
2218 번째 문서의 topic 비율은 [(4, 0.16956495), (8, 0.6321733), (9, 0.12736866)]
2219 번째 문서의 topic 비율은 [(2, 0.3605524), (10, 0.1741727), (13, 0.43125245)]
2220 번째 문서의 topic 비율은 [(3, 0.54302794), (6, 0.3283797)]
2221 번째 문서의 topic 비율은 [(2, 0.5172446), (5, 0.015875388), (6, 0.2750518), (8, 0.042754505), (10, 0.021408161), (11, 0.015877469), (13, 0.10192757)]
2222 번째 문서의 topic 비율은 [(0, 0.062499497), (3, 0.11791011), (4, 0.06881695), (8, 0.05148164), (13, 0.65977085)]
2223 번째 문서의 topic 비율은 [(2, 0.16775945), (3, 

2406 번째 문서의 topic 비율은 [(0, 0.10973902), (2, 0.0760449), (6, 0.028905945), (7, 0.048415188), (8, 0.27755964), (9, 0.39709547), (14, 0.041458566), (17, 0.01476628)]
2407 번째 문서의 topic 비율은 [(2, 0.21821079), (8, 0.35775572), (9, 0.18912642), (10, 0.1681647), (13, 0.046969708)]
2408 번째 문서의 topic 비율은 [(0, 0.027062934), (2, 0.52397096), (7, 0.057243742), (8, 0.07148923), (10, 0.088916555), (13, 0.15597309), (14, 0.017499167), (15, 0.05059784)]
2409 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.05), (12, 0.05), (13, 0.05), (14, 0.05), (15, 0.05), (16, 0.05), (17, 0.05), (18, 0.05), (19, 0.05)]
2410 번째 문서의 topic 비율은 [(0, 0.29472297), (8, 0.3855748), (9, 0.083811186), (10, 0.19422565), (13, 0.029357048)]
2411 번째 문서의 topic 비율은 [(2, 0.22167288), (3, 0.037944112), (4, 0.26922196), (6, 0.32237512), (9, 0.137581)]
2412 번째 문서의 topic 비율은 [(0, 0.026832893), (2, 0.09871332), (3, 0.034577496), (7, 0.0795111

2618 번째 문서의 topic 비율은 [(0, 0.16705167), (2, 0.12238255), (8, 0.42086542), (10, 0.14874859), (11, 0.014111083), (12, 0.017666707), (13, 0.02780648), (19, 0.07324276)]
2619 번째 문서의 topic 비율은 [(6, 0.114297025), (7, 0.05849736), (8, 0.20398034), (9, 0.50660986), (16, 0.043506354), (17, 0.04390162)]
2620 번째 문서의 topic 비율은 [(6, 0.17960268), (8, 0.7203678)]
2621 번째 문서의 topic 비율은 [(2, 0.43766552), (3, 0.092121966), (8, 0.096476205), (9, 0.01802717), (13, 0.2951996), (14, 0.018700212), (16, 0.014268877), (17, 0.018951036)]
2622 번째 문서의 topic 비율은 [(2, 0.44179317), (3, 0.0240453), (7, 0.14486322), (8, 0.04936468), (9, 0.011950055), (12, 0.022517549), (13, 0.25403652), (15, 0.02103036)]
2623 번째 문서의 topic 비율은 [(7, 0.4564231), (9, 0.41538417), (14, 0.047682736), (18, 0.04569101)]
2624 번째 문서의 topic 비율은 [(4, 0.15444519), (6, 0.4713446), (7, 0.091847315), (8, 0.17968972), (9, 0.06990039)]
2625 번째 문서의 topic 비율은 [(2, 0.76274616), (7, 0.10142762), (19, 0.075039245)]
2626 번째 문서의 topic 비율은 [(0, 0.016741406), (

2835 번째 문서의 topic 비율은 [(2, 0.26506886), (6, 0.015411567), (7, 0.17762956), (8, 0.2782365), (9, 0.012704851), (10, 0.06189508), (12, 0.011064181), (16, 0.024522439), (19, 0.15276162)]
2836 번째 문서의 topic 비율은 [(6, 0.35389537), (8, 0.3569406), (9, 0.059369765), (13, 0.10771902), (18, 0.08937382)]
2837 번째 문서의 topic 비율은 [(0, 0.033272415), (2, 0.25003642), (4, 0.35000664), (7, 0.08181955), (9, 0.046101388), (12, 0.018087119), (14, 0.013043756), (15, 0.11401681), (17, 0.07661035)]
2838 번째 문서의 topic 비율은 [(0, 0.1076835), (2, 0.32835838), (8, 0.4072769), (13, 0.09733743), (14, 0.036568563)]
2839 번째 문서의 topic 비율은 [(0, 0.14402395), (4, 0.024777481), (6, 0.12342258), (8, 0.40660194), (9, 0.16981377), (10, 0.1202405)]
2840 번째 문서의 topic 비율은 [(2, 0.06585153), (7, 0.059031554), (8, 0.13867655), (13, 0.7131715)]
2841 번째 문서의 topic 비율은 [(0, 0.010000928), (1, 0.010000928), (2, 0.010000928), (3, 0.010000928), (4, 0.010000928), (5, 0.010000928), (6, 0.18715836), (7, 0.010000928), (8, 0.29086468), (9, 0.0100009

3064 번째 문서의 topic 비율은 [(4, 0.06684076), (5, 0.038587812), (8, 0.09581463), (12, 0.018120816), (13, 0.7304321), (18, 0.03810366)]
3065 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.05), (12, 0.05), (13, 0.05), (14, 0.05), (15, 0.05), (16, 0.05), (17, 0.05), (18, 0.05), (19, 0.05)]
3066 번째 문서의 topic 비율은 [(2, 0.11674), (6, 0.5791449), (12, 0.06839273), (14, 0.13034254), (16, 0.08448506)]
3067 번째 문서의 topic 비율은 [(6, 0.21546528), (7, 0.0391002), (8, 0.6815958), (12, 0.03622424)]
3068 번째 문서의 topic 비율은 [(0, 0.010014441), (1, 0.010014441), (2, 0.60744715), (3, 0.010014441), (4, 0.010014441), (5, 0.010014441), (6, 0.010014441), (7, 0.010014441), (8, 0.010014441), (9, 0.010014441), (10, 0.21229294), (11, 0.010014441), (12, 0.010014441), (13, 0.010014441), (14, 0.010014443), (15, 0.010014441), (16, 0.010014441), (17, 0.010014441), (18, 0.010014441), (19, 0.010014441)]
3069 번째 문서의 topic 비율은 [(10, 0.

3273 번째 문서의 topic 비율은 [(7, 0.117402926), (8, 0.49250922), (9, 0.29774565), (12, 0.052309312)]
3274 번째 문서의 topic 비율은 [(7, 0.08510882), (8, 0.64122117), (10, 0.1805847), (19, 0.053064685)]
3275 번째 문서의 topic 비율은 [(0, 0.015287784), (2, 0.2967638), (4, 0.35647133), (7, 0.05326805), (8, 0.219621), (11, 0.010984588), (15, 0.011114443), (19, 0.011327207)]
3276 번째 문서의 topic 비율은 [(0, 0.012509624), (1, 0.012509624), (2, 0.012509624), (3, 0.012509624), (4, 0.012509624), (5, 0.012509624), (6, 0.2626862), (7, 0.012509624), (8, 0.012509624), (9, 0.012509624), (10, 0.012509624), (11, 0.012509624), (12, 0.5121406), (13, 0.012509624), (14, 0.012509624), (15, 0.012509624), (16, 0.012509624), (17, 0.012509624), (18, 0.012509624), (19, 0.012509624)]
3277 번째 문서의 topic 비율은 [(5, 0.06583267), (6, 0.8778996)]
3278 번째 문서의 topic 비율은 [(0, 0.025550585), (2, 0.6611238), (3, 0.08670247), (4, 0.020808604), (5, 0.042085044), (7, 0.0721765), (14, 0.025704691), (15, 0.028719638), (17, 0.026099876)]
3279 번째 문서의 topic 비율은 

3493 번째 문서의 topic 비율은 [(0, 0.01256115), (2, 0.050690465), (8, 0.08700119), (12, 0.07992567), (13, 0.754245)]
3494 번째 문서의 topic 비율은 [(0, 0.17788547), (3, 0.13923393), (8, 0.14700982), (9, 0.2616808), (13, 0.090722404), (16, 0.04349237), (17, 0.07120461), (19, 0.04370923)]
3495 번째 문서의 topic 비율은 [(3, 0.86668), (13, 0.035381794), (15, 0.07917639), (19, 0.014019517)]
3496 번째 문서의 topic 비율은 [(0, 0.124856435), (6, 0.41861805), (9, 0.3856331)]
3497 번째 문서의 topic 비율은 [(8, 0.40367046), (9, 0.5400426)]
3498 번째 문서의 topic 비율은 [(2, 0.48872128), (8, 0.4612456)]
3499 번째 문서의 topic 비율은 [(8, 0.22910827), (9, 0.6515403), (10, 0.093440436), (14, 0.017652437)]
3500 번째 문서의 topic 비율은 [(2, 0.8504371), (7, 0.06994869), (14, 0.055311106)]
3501 번째 문서의 topic 비율은 [(6, 0.19960149), (9, 0.47988436), (13, 0.16027662), (17, 0.062196348), (18, 0.060487226)]
3502 번째 문서의 topic 비율은 [(6, 0.28626594), (8, 0.19057457), (9, 0.46239662)]
3503 번째 문서의 topic 비율은 [(6, 0.35318998), (7, 0.062705636), (8, 0.15774001), (9, 0.167837), (18

3721 번째 문서의 topic 비율은 [(2, 0.7729184), (7, 0.032800097), (8, 0.047522485), (9, 0.03147743), (13, 0.095773734), (19, 0.012481508)]
3722 번째 문서의 topic 비율은 [(0, 0.08701292), (2, 0.3519768), (8, 0.21370643), (9, 0.18247461), (17, 0.12066035)]
3723 번째 문서의 topic 비율은 [(8, 0.17596155), (17, 0.674032)]
3724 번째 문서의 topic 비율은 [(6, 0.04795685), (8, 0.2282539), (10, 0.13496837), (12, 0.55543476)]
3725 번째 문서의 topic 비율은 [(9, 0.9907724)]
3726 번째 문서의 topic 비율은 [(0, 0.44825032), (8, 0.28182596), (19, 0.23127578)]
3727 번째 문서의 topic 비율은 [(2, 0.13230662), (4, 0.40132642), (7, 0.062278267), (8, 0.20148616), (10, 0.017811287), (14, 0.08083645), (16, 0.0473527), (17, 0.0474816)]
3728 번째 문서의 topic 비율은 [(2, 0.5133473), (3, 0.27254426), (7, 0.0887978), (12, 0.011865228), (15, 0.051833473), (16, 0.050630998)]
3729 번째 문서의 topic 비율은 [(2, 0.6541117), (8, 0.056196313), (10, 0.04697626), (13, 0.1784069), (19, 0.037493404)]
3730 번째 문서의 topic 비율은 [(0, 0.025032526), (1, 0.025032526), (2, 0.524382), (3, 0.025032526), (4, 0

3929 번째 문서의 topic 비율은 [(2, 0.041649826), (3, 0.032034896), (6, 0.7407815), (9, 0.13347913), (18, 0.031108428)]
3930 번째 문서의 topic 비율은 [(0, 0.22959433), (6, 0.16734324), (8, 0.5085234)]
3931 번째 문서의 topic 비율은 [(5, 0.03153892), (8, 0.1911481), (9, 0.2828748), (11, 0.043107122), (14, 0.046167105), (18, 0.34998235), (19, 0.045885626)]
3932 번째 문서의 topic 비율은 [(2, 0.5295487), (4, 0.079008006), (5, 0.017129207), (7, 0.05241424), (8, 0.15454881), (12, 0.04643015), (15, 0.110594936)]
3933 번째 문서의 topic 비율은 [(2, 0.24955246), (6, 0.6068299), (17, 0.07792653)]
3934 번째 문서의 topic 비율은 [(1, 0.018195346), (2, 0.08768924), (6, 0.062097277), (7, 0.020507012), (8, 0.46211395), (9, 0.26358166), (16, 0.021130992), (19, 0.06118149)]
3935 번째 문서의 topic 비율은 [(2, 0.7508134), (7, 0.13223656), (13, 0.09668967)]
3936 번째 문서의 topic 비율은 [(8, 0.25103977), (10, 0.17294621), (12, 0.084085844), (13, 0.35168192), (14, 0.08252706)]
3937 번째 문서의 topic 비율은 [(6, 0.5964765), (9, 0.3342728)]
3938 번째 문서의 topic 비율은 [(2, 0.7951623), (3,

4128 번째 문서의 topic 비율은 [(2, 0.21704811), (4, 0.04324695), (5, 0.049861994), (6, 0.018971426), (7, 0.65667105)]
4129 번째 문서의 topic 비율은 [(6, 0.48714674), (8, 0.4002815)]
4130 번째 문서의 topic 비율은 [(8, 0.69577914), (10, 0.1755955)]
4131 번째 문서의 topic 비율은 [(8, 0.2563464), (9, 0.7263339)]
4132 번째 문서의 topic 비율은 [(2, 0.023887109), (3, 0.13513026), (7, 0.22041532), (9, 0.10031075), (10, 0.065491006), (13, 0.4453755)]
4133 번째 문서의 topic 비율은 [(6, 0.22087336), (7, 0.16199662), (9, 0.21616201), (18, 0.37512138)]
4134 번째 문서의 topic 비율은 [(0, 0.010000977), (1, 0.010000977), (2, 0.010000977), (3, 0.010000977), (4, 0.010000977), (5, 0.010000977), (6, 0.010000977), (7, 0.010000977), (8, 0.5809241), (9, 0.010000977), (10, 0.23905836), (11, 0.010000977), (12, 0.010000977), (13, 0.010000977), (14, 0.010000977), (15, 0.010000977), (16, 0.010000977), (17, 0.010000977), (18, 0.010000977), (19, 0.010000977)]
4135 번째 문서의 topic 비율은 [(1, 0.2278551), (3, 0.26917413), (8, 0.32200226), (13, 0.14460035), (15, 0.021355517)]
41

4327 번째 문서의 topic 비율은 [(3, 0.07194661), (4, 0.39095786), (8, 0.397208), (12, 0.014466682), (15, 0.11467168)]
4328 번째 문서의 topic 비율은 [(2, 0.13919108), (6, 0.20575714), (9, 0.54969144), (14, 0.06087136)]
4329 번째 문서의 topic 비율은 [(9, 0.9400318), (14, 0.038003206)]
4330 번째 문서의 topic 비율은 [(6, 0.10816665), (8, 0.58223206), (10, 0.14019787), (14, 0.06350017), (16, 0.06172322)]
4331 번째 문서의 topic 비율은 [(4, 0.09582776), (7, 0.17543975), (9, 0.7098278)]
4332 번째 문서의 topic 비율은 [(8, 0.60096455), (9, 0.3630276)]
4333 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.05), (12, 0.05), (13, 0.05), (14, 0.05), (15, 0.05), (16, 0.05), (17, 0.05), (18, 0.05), (19, 0.05)]
4334 번째 문서의 topic 비율은 [(8, 0.28244257), (9, 0.5048953), (17, 0.17569476)]
4335 번째 문서의 topic 비율은 [(1, 0.055231538), (2, 0.11039188), (4, 0.034227543), (5, 0.019931505), (6, 0.015950594), (7, 0.10851709), (8, 0.47561085), (9, 0.07244251), (10, 0.0289

4541 번째 문서의 topic 비율은 [(0, 0.49527702), (2, 0.37490764), (7, 0.020841923), (8, 0.050068416), (10, 0.03203154), (14, 0.01800187)]
4542 번째 문서의 topic 비율은 [(2, 0.40405643), (8, 0.15198931), (9, 0.36541283), (12, 0.06219471)]
4543 번째 문서의 topic 비율은 [(5, 0.05750379), (6, 0.14288567), (7, 0.12359212), (8, 0.21384862), (9, 0.16780676), (18, 0.26388863)]
4544 번째 문서의 topic 비율은 [(0, 0.47616515), (2, 0.21871576), (3, 0.010173088), (7, 0.086708985), (8, 0.19416593)]
4545 번째 문서의 topic 비율은 [(6, 0.38843954), (7, 0.055193506), (8, 0.28450656), (14, 0.1405538), (18, 0.074865736), (19, 0.03384318)]
4546 번째 문서의 topic 비율은 [(6, 0.16341059), (8, 0.4068136), (9, 0.21788274), (13, 0.063658416), (17, 0.114117056)]
4547 번째 문서의 topic 비율은 [(0, 0.5427129), (2, 0.12526107), (7, 0.0335312), (8, 0.25823522), (19, 0.035536878)]
4548 번째 문서의 topic 비율은 [(2, 0.20574227), (3, 0.11228767), (8, 0.47296172), (12, 0.037446696), (15, 0.10721678), (18, 0.040059086)]
4549 번째 문서의 topic 비율은 [(2, 0.19315132), (7, 0.13664646), (9, 0.17

4751 번째 문서의 topic 비율은 [(0, 0.56948704), (9, 0.35550165)]
4752 번째 문서의 topic 비율은 [(6, 0.2576414), (9, 0.6900861), (18, 0.029293794)]
4753 번째 문서의 topic 비율은 [(2, 0.10288087), (5, 0.0328181), (6, 0.57401305), (9, 0.26673642)]
4754 번째 문서의 topic 비율은 [(0, 0.08363062), (2, 0.22754335), (6, 0.03793484), (9, 0.6326875)]
4755 번째 문서의 topic 비율은 [(1, 0.031792782), (2, 0.04188031), (7, 0.034429953), (9, 0.8676082)]
4756 번째 문서의 topic 비율은 [(0, 0.61272424), (2, 0.18202174), (3, 0.03081668), (8, 0.15904371)]
4757 번째 문서의 topic 비율은 [(5, 0.024066957), (7, 0.028486656), (8, 0.6859311), (10, 0.16098884), (17, 0.083060816)]
4758 번째 문서의 topic 비율은 [(2, 0.61666334), (8, 0.14687958), (10, 0.10487273), (13, 0.029454356), (16, 0.044700343), (19, 0.043940913)]
4759 번째 문서의 topic 비율은 [(2, 0.25853747), (3, 0.038008794), (4, 0.26919973), (7, 0.14832665), (8, 0.13920134), (12, 0.029203743), (15, 0.106857546)]
4760 번째 문서의 topic 비율은 [(0, 0.034212846), (1, 0.062457863), (2, 0.5408938), (4, 0.08786444), (8, 0.19402073), (9, 0.

4964 번째 문서의 topic 비율은 [(0, 0.07344513), (6, 0.15180668), (9, 0.7216086)]
4965 번째 문서의 topic 비율은 [(9, 0.88124156)]
4966 번째 문서의 topic 비율은 [(0, 0.02500095), (1, 0.02500095), (2, 0.02500095), (3, 0.02500095), (4, 0.02500095), (5, 0.02500095), (6, 0.025000958), (7, 0.02500095), (8, 0.02500095), (9, 0.02500095), (10, 0.02500095), (11, 0.02500095), (12, 0.02500095), (13, 0.02500095), (14, 0.02500095), (15, 0.02500095), (16, 0.02500095), (17, 0.5249819), (18, 0.02500095), (19, 0.02500095)]
4967 번째 문서의 topic 비율은 [(0, 0.17095922), (2, 0.6638145), (12, 0.10442773)]
4968 번째 문서의 topic 비율은 [(2, 0.9219135), (3, 0.048062604)]
4969 번째 문서의 topic 비율은 [(0, 0.0100051565), (1, 0.0100051565), (2, 0.0100051565), (3, 0.0100051565), (4, 0.0100051565), (5, 0.0100051565), (6, 0.0100051565), (7, 0.0100051565), (8, 0.8099021), (9, 0.0100051565), (10, 0.0100051565), (11, 0.0100051565), (12, 0.0100051565), (13, 0.0100051565), (14, 0.0100051565), (15, 0.0100051565), (16, 0.0100051565), (17, 0.0100051565), (18, 0.010005

5189 번째 문서의 topic 비율은 [(0, 0.042859573), (2, 0.77742594), (15, 0.14698796)]
5190 번째 문서의 topic 비율은 [(2, 0.38990554), (8, 0.16584398), (10, 0.408145), (17, 0.02621072)]
5191 번째 문서의 topic 비율은 [(7, 0.24213603), (8, 0.43057415), (13, 0.29984397)]
5192 번째 문서의 topic 비율은 [(1, 0.016446177), (6, 0.116798736), (7, 0.05101635), (9, 0.7868892), (18, 0.017640792)]
5193 번째 문서의 topic 비율은 [(2, 0.18514894), (6, 0.10623219), (7, 0.053417925), (8, 0.53576726), (9, 0.07833757), (18, 0.023586448)]
5194 번째 문서의 topic 비율은 [(7, 0.20699085), (8, 0.3470449), (9, 0.4144467)]
5195 번째 문서의 topic 비율은 [(0, 0.26234654), (2, 0.2630308), (8, 0.27736783), (9, 0.07316578), (10, 0.10432117)]
5196 번째 문서의 topic 비율은 [(0, 0.03522605), (2, 0.0563195), (7, 0.050708387), (9, 0.8310266)]
5197 번째 문서의 topic 비율은 [(2, 0.36875355), (4, 0.043981627), (6, 0.05937494), (8, 0.41817114), (15, 0.097415075)]
5198 번째 문서의 topic 비율은 [(2, 0.0928608), (9, 0.5299916), (13, 0.04057495), (15, 0.031837363), (18, 0.28911686)]
5199 번째 문서의 topic 비율은 [(0, 0

5379 번째 문서의 topic 비율은 [(6, 0.093341924), (9, 0.4210804), (14, 0.20558172), (18, 0.2666538)]
5380 번째 문서의 topic 비율은 [(0, 0.10849601), (2, 0.57364297), (3, 0.04058608), (13, 0.05608598), (15, 0.1454532), (18, 0.04220817), (19, 0.020743692)]
5381 번째 문서의 topic 비율은 [(2, 0.17051041), (7, 0.21589497), (8, 0.46595046), (10, 0.0704525), (16, 0.06780162)]
5382 번째 문서의 topic 비율은 [(3, 0.040911645), (7, 0.12840186), (8, 0.35467973), (9, 0.09454684), (13, 0.2962774), (16, 0.075581)]
5383 번째 문서의 topic 비율은 [(2, 0.066244125), (9, 0.7884933), (14, 0.12592483)]
5384 번째 문서의 topic 비율은 [(2, 0.30547777), (4, 0.06861653), (8, 0.41549844), (9, 0.17224644)]
5385 번째 문서의 topic 비율은 [(6, 0.30931535), (8, 0.6478043)]
5386 번째 문서의 topic 비율은 [(2, 0.61741376), (9, 0.11866471), (13, 0.1181431), (15, 0.121509254)]
5387 번째 문서의 topic 비율은 [(2, 0.2119762), (7, 0.28630665), (10, 0.23612885), (12, 0.14639084), (15, 0.065484345)]
5388 번째 문서의 topic 비율은 [(7, 0.10911535), (8, 0.26179075), (9, 0.4275122), (16, 0.16145834)]
5389 번째 문서의

5590 번째 문서의 topic 비율은 [(0, 0.010009811), (1, 0.010009811), (2, 0.6101534), (3, 0.010009811), (4, 0.010009811), (5, 0.010009811), (6, 0.010009812), (7, 0.010009811), (8, 0.010009811), (9, 0.010009811), (10, 0.010009811), (11, 0.010009811), (12, 0.010009811), (13, 0.010009811), (14, 0.010009811), (15, 0.010009811), (16, 0.010009811), (17, 0.010009811), (18, 0.20967004), (19, 0.010009811)]
5591 번째 문서의 topic 비율은 [(2, 0.43098548), (3, 0.10273734), (4, 0.20646025), (9, 0.1096252), (10, 0.09639799)]
5592 번째 문서의 topic 비율은 [(2, 0.11235385), (5, 0.018576076), (8, 0.7860354), (17, 0.0525506), (18, 0.018561756)]
5593 번째 문서의 topic 비율은 [(6, 0.048164684), (8, 0.4229302), (9, 0.49193713), (17, 0.020959403)]
5594 번째 문서의 topic 비율은 [(0, 0.0561645), (2, 0.071903), (8, 0.2824334), (9, 0.56445855)]
5595 번째 문서의 topic 비율은 [(1, 0.019646216), (2, 0.021520363), (8, 0.23847273), (9, 0.6612766), (13, 0.024576379), (14, 0.021223668)]
5596 번째 문서의 topic 비율은 [(2, 0.7361154), (6, 0.2076035)]
5597 번째 문서의 topic 비율은 [(0, 

5826 번째 문서의 topic 비율은 [(0, 0.023223065), (2, 0.30743375), (3, 0.03361283), (5, 0.034151398), (7, 0.0643091), (8, 0.07606363), (10, 0.013728552), (13, 0.38904762), (19, 0.021522062)]
5827 번째 문서의 topic 비율은 [(2, 0.050388366), (9, 0.69264144), (18, 0.236719)]
5828 번째 문서의 topic 비율은 [(2, 0.5580647), (3, 0.19828989), (13, 0.16017891), (15, 0.040833376), (17, 0.025152398)]
5829 번째 문서의 topic 비율은 [(2, 0.15822046), (3, 0.033701513), (8, 0.03741133), (10, 0.7453902), (13, 0.010826516)]
5830 번째 문서의 topic 비율은 [(2, 0.025931811), (4, 0.026738161), (8, 0.27905783), (9, 0.6315816), (19, 0.02136097)]
5831 번째 문서의 topic 비율은 [(2, 0.12783805), (4, 0.025088476), (5, 0.017559022), (7, 0.048187647), (8, 0.20144367), (13, 0.5654353)]
5832 번째 문서의 topic 비율은 [(13, 0.9405855)]
5833 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.05), (12, 0.05), (13, 0.05), (14, 0.05), (15, 0.05), (16, 0.05), (17, 0.05), (18, 0.05), (1

6042 번째 문서의 topic 비율은 [(0, 0.02579907), (2, 0.14437923), (4, 0.028017446), (7, 0.015680533), (8, 0.41993713), (9, 0.019387066), (10, 0.24168146), (17, 0.09747599)]
6043 번째 문서의 topic 비율은 [(2, 0.5164057), (3, 0.22422203), (9, 0.016705846), (15, 0.09529574), (19, 0.13388903)]
6044 번째 문서의 topic 비율은 [(2, 0.16061598), (6, 0.51074654), (18, 0.267895)]
6045 번째 문서의 topic 비율은 [(4, 0.049547553), (6, 0.52756226), (8, 0.087523594), (9, 0.12786742), (10, 0.18160658)]
6046 번째 문서의 topic 비율은 [(2, 0.39778897), (7, 0.4645084), (8, 0.11268552)]
6047 번째 문서의 topic 비율은 [(4, 0.31897092), (6, 0.25177664), (8, 0.17516924), (9, 0.15410736), (16, 0.07406992)]
6048 번째 문서의 topic 비율은 [(6, 0.16710629), (8, 0.0493779), (9, 0.75492305), (15, 0.020666366)]
6049 번째 문서의 topic 비율은 [(2, 0.6059077), (4, 0.059908867), (10, 0.10122724), (13, 0.15577987), (14, 0.02930237), (15, 0.014619261), (17, 0.02142266)]
6050 번째 문서의 topic 비율은 [(3, 0.16384387), (6, 0.2770918), (8, 0.48194775), (17, 0.04373928)]
6051 번째 문서의 topic 비율은 [(6, 0.

6273 번째 문서의 topic 비율은 [(2, 0.6811211), (4, 0.014030976), (8, 0.14044736), (9, 0.01343432), (12, 0.02248312), (13, 0.10999569), (19, 0.011550574)]
6274 번째 문서의 topic 비율은 [(8, 0.7558274), (12, 0.06603545), (13, 0.043114502), (16, 0.03382996), (18, 0.07698003)]
6275 번째 문서의 topic 비율은 [(2, 0.04847364), (4, 0.04514705), (8, 0.12770936), (9, 0.75064695), (15, 0.013376653)]
6276 번째 문서의 topic 비율은 [(8, 0.18549679), (12, 0.20586936), (13, 0.25614062), (16, 0.05055655), (19, 0.2892169)]
6277 번째 문서의 topic 비율은 [(0, 0.010004428), (1, 0.010004428), (2, 0.010004428), (3, 0.010004428), (4, 0.010004428), (5, 0.010004429), (6, 0.010004428), (7, 0.010004428), (8, 0.4023156), (9, 0.010004428), (10, 0.010004428), (11, 0.010004428), (12, 0.010004428), (13, 0.24819058), (14, 0.1794186), (15, 0.010004428), (16, 0.010004428), (17, 0.010004428), (18, 0.010004428), (19, 0.010004428)]
6278 번째 문서의 topic 비율은 [(6, 0.44351202), (9, 0.5314776)]
6279 번째 문서의 topic 비율은 [(2, 0.75001025), (4, 0.14521256), (18, 0.027334128), (

6538 번째 문서의 topic 비율은 [(2, 0.30232581), (6, 0.19163536), (8, 0.17010956), (9, 0.20056994), (10, 0.11114648)]
6539 번째 문서의 topic 비율은 [(1, 0.065752216), (2, 0.22593239), (3, 0.4505252), (8, 0.026509179), (13, 0.026831502), (14, 0.014432022), (15, 0.16678374)]
6540 번째 문서의 topic 비율은 [(2, 0.068718754), (6, 0.037826605), (13, 0.8306769), (18, 0.034068007), (19, 0.017158853)]
6541 번째 문서의 topic 비율은 [(2, 0.13883825), (6, 0.22652765), (8, 0.3740683), (9, 0.09177227), (13, 0.0550193), (16, 0.038006157), (17, 0.07239555)]
6542 번째 문서의 topic 비율은 [(6, 0.2943698), (8, 0.33988416), (9, 0.29490268)]
6543 번째 문서의 topic 비율은 [(2, 0.64613414), (8, 0.242363), (9, 0.045504592), (14, 0.013060756), (15, 0.013943988), (16, 0.012402177)]
6544 번째 문서의 topic 비율은 [(8, 0.5691256), (18, 0.28082982)]
6545 번째 문서의 topic 비율은 [(6, 0.24528696), (8, 0.4511987), (16, 0.20500757), (17, 0.058481574)]
6546 번째 문서의 topic 비율은 [(5, 0.013066774), (6, 0.49589136), (8, 0.049102876), (9, 0.15204324), (10, 0.07372768), (18, 0.20740281)]
654

6767 번째 문서의 topic 비율은 [(9, 0.94061196)]
6768 번째 문서의 topic 비율은 [(4, 0.13034017), (6, 0.48051393), (8, 0.12649147), (9, 0.17281306), (10, 0.03795558), (16, 0.032366086)]
6769 번째 문서의 topic 비율은 [(0, 0.01666932), (1, 0.01666932), (2, 0.01666932), (3, 0.01666932), (4, 0.01666932), (5, 0.01666932), (6, 0.01666932), (7, 0.01666932), (8, 0.01666932), (9, 0.01666932), (10, 0.34992316), (11, 0.01666932), (12, 0.01666932), (13, 0.01666932), (14, 0.01666932), (15, 0.01666932), (16, 0.01666932), (17, 0.35002908), (18, 0.01666932), (19, 0.01666932)]
6770 번째 문서의 topic 비율은 [(0, 0.1182181), (2, 0.5474361), (4, 0.16113219), (9, 0.033758048), (15, 0.12157832)]
6771 번째 문서의 topic 비율은 [(6, 0.61899626), (9, 0.33096075)]
6772 번째 문서의 topic 비율은 [(6, 0.11334896), (7, 0.02638276), (8, 0.322038), (9, 0.3852655), (14, 0.1099257), (16, 0.027785312)]
6773 번째 문서의 topic 비율은 [(0, 0.023467312), (2, 0.25221768), (4, 0.11961923), (6, 0.05528533), (7, 0.14226809), (8, 0.1994679), (9, 0.09406534), (12, 0.01137533), (15, 0.044

6989 번째 문서의 topic 비율은 [(4, 0.0698297), (6, 0.2352081), (8, 0.644909)]
6990 번째 문서의 topic 비율은 [(6, 0.06776496), (9, 0.35557818), (10, 0.019062009), (14, 0.42955798), (15, 0.018419221), (16, 0.02330979), (18, 0.07488256)]
6991 번째 문서의 topic 비율은 [(3, 0.7722797), (10, 0.13678329), (13, 0.07652343)]
6992 번째 문서의 topic 비율은 [(3, 0.6285347), (11, 0.04602975), (13, 0.27228802)]
6993 번째 문서의 topic 비율은 [(4, 0.114699244), (7, 0.1560315), (8, 0.15227042), (9, 0.50611144), (16, 0.05644809)]
6994 번째 문서의 topic 비율은 [(0, 0.012503129), (1, 0.012503129), (2, 0.01250313), (3, 0.012503129), (4, 0.012503129), (5, 0.012503129), (6, 0.7624405), (7, 0.012503129), (8, 0.012503129), (9, 0.012503129), (10, 0.012503129), (11, 0.012503129), (12, 0.012503129), (13, 0.012503129), (14, 0.012503129), (15, 0.012503129), (16, 0.012503129), (17, 0.012503129), (18, 0.012503129), (19, 0.012503129)]
6995 번째 문서의 topic 비율은 [(1, 0.07502951), (2, 0.7815329), (15, 0.08267249)]
6996 번째 문서의 topic 비율은 [(0, 0.12783973), (2, 0.6187743), (6

7156 번째 문서의 topic 비율은 [(2, 0.5308524), (3, 0.026256932), (4, 0.18351525), (7, 0.017885221), (8, 0.14460246), (9, 0.017730732), (10, 0.027240701), (14, 0.027205588), (19, 0.013136823)]
7157 번째 문서의 topic 비율은 [(2, 0.47645798), (5, 0.03879331), (6, 0.3506159), (9, 0.10445958)]
7158 번째 문서의 topic 비율은 [(0, 0.010005794), (1, 0.010005794), (2, 0.010005794), (3, 0.010005794), (4, 0.010005794), (5, 0.010005794), (6, 0.6099723), (7, 0.010005794), (8, 0.010005794), (9, 0.010005794), (10, 0.010005794), (11, 0.010005794), (12, 0.20992343), (13, 0.010005794), (14, 0.010005794), (15, 0.010005794), (16, 0.010005794), (17, 0.010005794), (18, 0.010005794), (19, 0.010005794)]
7159 번째 문서의 topic 비율은 [(2, 0.4175116), (4, 0.25852108), (7, 0.2584721)]
7160 번째 문서의 topic 비율은 [(0, 0.5957537), (2, 0.3732024)]
7161 번째 문서의 topic 비율은 [(6, 0.41359076), (9, 0.4530623), (12, 0.094697855)]
7162 번째 문서의 topic 비율은 [(2, 0.6913929), (7, 0.029677982), (8, 0.2419455), (9, 0.027383016)]
7163 번째 문서의 topic 비율은 [(7, 0.10024167), (8,

7366 번째 문서의 topic 비율은 [(8, 0.4456812), (9, 0.3937945), (15, 0.095104545)]
7367 번째 문서의 topic 비율은 [(2, 0.42210203), (3, 0.15816718), (7, 0.17265974), (8, 0.026064582), (9, 0.026994176), (13, 0.16073659), (19, 0.017888017)]
7368 번째 문서의 topic 비율은 [(2, 0.051361002), (7, 0.14841238), (10, 0.07877312), (12, 0.022217698), (13, 0.6812319), (15, 0.0103662135)]
7369 번째 문서의 topic 비율은 [(2, 0.48345277), (6, 0.15749022), (7, 0.08087182), (9, 0.2359918)]
7370 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.05), (12, 0.05), (13, 0.05), (14, 0.05), (15, 0.05), (16, 0.05), (17, 0.05), (18, 0.05), (19, 0.05)]
7371 번째 문서의 topic 비율은 [(0, 0.14653401), (2, 0.7199873), (9, 0.0504941), (10, 0.052130252)]
7372 번째 문서의 topic 비율은 [(2, 0.23776178), (4, 0.079420455), (5, 0.021299886), (6, 0.22451863), (8, 0.15237823), (9, 0.26737794), (17, 0.012350176)]
7373 번째 문서의 topic 비율은 [(8, 0.7377805), (9, 0.1080271), (16, 0.05253

7607 번째 문서의 topic 비율은 [(0, 0.012507431), (1, 0.012507431), (2, 0.51230097), (3, 0.012507431), (4, 0.012507431), (5, 0.012507431), (6, 0.012507431), (7, 0.012507431), (8, 0.012507432), (9, 0.012507431), (10, 0.012507432), (11, 0.012507431), (12, 0.012507431), (13, 0.012507431), (14, 0.012507431), (15, 0.012507431), (16, 0.012507431), (17, 0.2625653), (18, 0.012507431), (19, 0.012507431)]
7608 번째 문서의 topic 비율은 [(3, 0.05799106), (4, 0.3356737), (7, 0.06675593), (8, 0.19230485), (9, 0.2435824), (18, 0.06218278)]
7609 번째 문서의 topic 비율은 [(5, 0.09303673), (8, 0.17557171), (9, 0.53163695), (11, 0.04772189), (17, 0.11791147)]
7610 번째 문서의 topic 비율은 [(0, 0.046019427), (4, 0.020089256), (6, 0.36720258), (8, 0.2469146), (9, 0.24799603), (10, 0.059679598)]
7611 번째 문서의 topic 비율은 [(6, 0.061090086), (8, 0.117823906), (9, 0.7525687), (16, 0.039926838)]
7612 번째 문서의 topic 비율은 [(2, 0.7308579), (12, 0.024459723), (13, 0.22489187)]
7613 번째 문서의 topic 비율은 [(8, 0.14255278), (9, 0.6478279), (10, 0.15642598)]
7614

7808 번째 문서의 topic 비율은 [(3, 0.0638415), (7, 0.09017457), (8, 0.07885262), (9, 0.71712035), (15, 0.029142391)]
7809 번째 문서의 topic 비율은 [(2, 0.7126782), (3, 0.18504539), (4, 0.025216576), (5, 0.0407324), (19, 0.021285053)]
7810 번째 문서의 topic 비율은 [(0, 0.3250157), (1, 0.024224926), (2, 0.3435302), (3, 0.086043485), (8, 0.18512592), (9, 0.018041482), (14, 0.012260727)]
7811 번째 문서의 topic 비율은 [(6, 0.546815), (7, 0.3781693)]
7812 번째 문서의 topic 비율은 [(2, 0.10699417), (6, 0.066012666), (8, 0.19344549), (9, 0.6189944)]
7813 번째 문서의 topic 비율은 [(2, 0.21482253), (4, 0.07645532), (9, 0.6682072)]
7814 번째 문서의 topic 비율은 [(0, 0.04302575), (1, 0.024278099), (3, 0.03818243), (8, 0.2831654), (12, 0.03282601), (13, 0.45053023), (16, 0.05440982), (17, 0.054124072)]
7815 번째 문서의 topic 비율은 [(0, 0.42188135), (2, 0.22931005), (8, 0.2756463), (9, 0.013783843), (12, 0.018550074), (19, 0.02814546)]
7816 번째 문서의 topic 비율은 [(9, 0.64279747), (17, 0.13570035), (18, 0.13646953)]
7817 번째 문서의 topic 비율은 [(2, 0.6159909), (4, 0.101486

8021 번째 문서의 topic 비율은 [(2, 0.24835272), (5, 0.09121418), (7, 0.17504622), (8, 0.3723131), (10, 0.084584504), (19, 0.017870763)]
8022 번째 문서의 topic 비율은 [(2, 0.07501964), (8, 0.7398683), (9, 0.10143426), (19, 0.06986152)]
8023 번째 문서의 topic 비율은 [(2, 0.17626627), (6, 0.16931373), (9, 0.6269785)]
8024 번째 문서의 topic 비율은 [(2, 0.24304016), (6, 0.037548058), (7, 0.01957469), (10, 0.044829793), (12, 0.014535182), (13, 0.49582526), (16, 0.08702353), (19, 0.035152707)]
8025 번째 문서의 topic 비율은 [(0, 0.07396648), (1, 0.020227881), (2, 0.042729665), (3, 0.070261344), (4, 0.03685294), (7, 0.046858538), (8, 0.5285518), (10, 0.050032746), (16, 0.034286033), (17, 0.087431915)]
8026 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.05), (12, 0.05), (13, 0.05), (14, 0.05), (15, 0.05), (16, 0.05), (17, 0.05), (18, 0.05), (19, 0.05)]
8027 번째 문서의 topic 비율은 [(2, 0.6896136), (8, 0.26985544), (9, 0.031935837)]
8028 번째 문서의

8217 번째 문서의 topic 비율은 [(2, 0.09402162), (4, 0.14035356), (6, 0.42489842), (8, 0.25497758), (14, 0.04838082), (18, 0.025680687)]
8218 번째 문서의 topic 비율은 [(0, 0.016786737), (2, 0.48945606), (3, 0.16737424), (7, 0.037216358), (8, 0.12596378), (10, 0.014961227), (15, 0.11118748), (19, 0.018686166)]
8219 번째 문서의 topic 비율은 [(2, 0.65511036), (7, 0.22699867), (8, 0.07635108), (15, 0.027234092)]
8220 번째 문서의 topic 비율은 [(6, 0.31237862), (8, 0.21738909), (9, 0.24410886), (14, 0.10229501), (19, 0.07379407)]
8221 번째 문서의 topic 비율은 [(2, 0.21502328), (3, 0.028056607), (5, 0.018677149), (8, 0.13819434), (10, 0.024316257), (12, 0.054519463), (13, 0.5040156), (19, 0.010870446)]
8222 번째 문서의 topic 비율은 [(2, 0.07820797), (3, 0.03785589), (5, 0.024889255), (12, 0.059978954), (13, 0.75287926), (19, 0.03747543)]
8223 번째 문서의 topic 비율은 [(6, 0.16106236), (9, 0.7639087)]
8224 번째 문서의 topic 비율은 [(2, 0.38598952), (3, 0.41263503), (7, 0.0881998), (8, 0.037614804), (12, 0.013598526), (15, 0.016139705)]
8225 번째 문서의 topic 비율은

8439 번째 문서의 topic 비율은 [(2, 0.191177), (4, 0.17950757), (6, 0.30286658), (8, 0.15501927), (9, 0.0775636), (18, 0.08256225)]
8440 번째 문서의 topic 비율은 [(4, 0.27422696), (7, 0.17162596), (9, 0.48053452), (18, 0.0572765)]
8441 번째 문서의 topic 비율은 [(5, 0.017434197), (6, 0.24259758), (7, 0.44763556), (8, 0.16963969), (11, 0.08797869), (12, 0.023769137)]
8442 번째 문서의 topic 비율은 [(0, 0.036193274), (1, 0.20334244), (8, 0.31004214), (9, 0.3071566), (12, 0.034020085), (17, 0.086625874)]
8443 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.05), (12, 0.05), (13, 0.05), (14, 0.05), (15, 0.05), (16, 0.05), (17, 0.05), (18, 0.05), (19, 0.05)]
8444 번째 문서의 topic 비율은 [(9, 0.08605498), (10, 0.26250973), (13, 0.546464), (19, 0.064941585)]
8445 번째 문서의 topic 비율은 [(2, 0.09816791), (4, 0.7564243), (8, 0.012054481), (9, 0.022067318), (15, 0.097778864)]
8446 번째 문서의 topic 비율은 [(8, 0.4926632), (13, 0.13075499), (18, 0.1452381

8637 번째 문서의 topic 비율은 [(4, 0.5672948), (7, 0.24900618), (8, 0.15097839)]
8638 번째 문서의 topic 비율은 [(0, 0.09849006), (1, 0.0683216), (3, 0.026467558), (5, 0.024388377), (6, 0.13660426), (8, 0.2628445), (9, 0.06678397), (16, 0.057751622), (17, 0.22241221), (18, 0.024291625)]
8639 번째 문서의 topic 비율은 [(3, 0.04196448), (5, 0.03889397), (6, 0.20401894), (9, 0.68546104)]
8640 번째 문서의 topic 비율은 [(10, 0.14526111), (13, 0.59102213), (17, 0.18638384)]
8641 번째 문서의 topic 비율은 [(0, 0.4422793), (2, 0.2284884), (5, 0.04164085), (8, 0.21960722), (10, 0.015210549), (14, 0.030598553)]
8642 번째 문서의 topic 비율은 [(0, 0.34887117), (1, 0.60006845), (8, 0.026337931), (10, 0.022581695)]
8643 번째 문서의 topic 비율은 [(8, 0.6142358), (9, 0.23962857), (11, 0.080738075)]
8644 번째 문서의 topic 비율은 [(0, 0.0258921), (2, 0.17809016), (4, 0.041868303), (6, 0.06149153), (7, 0.092962526), (8, 0.17765845), (9, 0.29272163), (10, 0.04876491), (14, 0.034899604), (18, 0.02201163), (19, 0.016804358)]
8645 번째 문서의 topic 비율은 [(8, 0.5071785), (9, 0.460

8884 번째 문서의 topic 비율은 [(0, 0.025023129), (1, 0.025023129), (2, 0.5245605), (3, 0.025023129), (4, 0.025023129), (5, 0.025023129), (6, 0.025023129), (7, 0.025023129), (8, 0.025023129), (9, 0.025023129), (10, 0.025023129), (11, 0.025023129), (12, 0.025023129), (13, 0.02502313), (14, 0.025023129), (15, 0.025023129), (16, 0.025023129), (17, 0.025023129), (18, 0.025023129), (19, 0.025023129)]
8885 번째 문서의 topic 비율은 [(2, 0.23651022), (6, 0.30278936), (7, 0.033223547), (9, 0.31948635), (17, 0.063136905), (19, 0.02727393)]
8886 번째 문서의 topic 비율은 [(2, 0.6878764), (4, 0.1815655), (14, 0.02187375), (15, 0.037110023), (17, 0.057665702)]
8887 번째 문서의 topic 비율은 [(2, 0.17508627), (6, 0.055345837), (9, 0.0926904), (11, 0.010765979), (14, 0.03795666), (18, 0.62053764)]
8888 번째 문서의 topic 비율은 [(6, 0.51318324), (8, 0.2817058), (9, 0.17853431)]
8889 번째 문서의 topic 비율은 [(2, 0.50763535), (3, 0.23360926), (8, 0.16503333), (10, 0.061690614)]
8890 번째 문서의 topic 비율은 [(2, 0.20024987), (6, 0.7179084)]
8891 번째 문서의 topic 비

9102 번째 문서의 topic 비율은 [(4, 0.111247726), (8, 0.2695067), (10, 0.5302867), (13, 0.05076525)]
9103 번째 문서의 topic 비율은 [(0, 0.24559768), (1, 0.05797873), (6, 0.16973208), (13, 0.2297814), (14, 0.12352126), (19, 0.13444556)]
9104 번째 문서의 topic 비율은 [(6, 0.84356076), (12, 0.032506313), (17, 0.032710448), (18, 0.066193)]
9105 번째 문서의 topic 비율은 [(6, 0.25631344), (9, 0.42352864), (18, 0.28319663)]
9106 번째 문서의 topic 비율은 [(0, 0.34093338), (3, 0.2565324), (6, 0.03342813), (7, 0.055361275), (8, 0.04718985), (9, 0.18091169), (13, 0.03837772), (17, 0.02203043), (18, 0.022241512)]
9107 번째 문서의 topic 비율은 [(6, 0.5828815), (9, 0.27232352), (12, 0.08807448)]
9108 번째 문서의 topic 비율은 [(5, 0.06984061), (8, 0.5793559), (15, 0.29409266)]
9109 번째 문서의 topic 비율은 [(2, 0.16326785), (8, 0.51440233), (12, 0.25146055)]
9110 번째 문서의 topic 비율은 [(0, 0.2284727), (2, 0.10177962), (8, 0.35614318), (9, 0.043079477), (10, 0.124142766), (17, 0.12792437)]
9111 번째 문서의 topic 비율은 [(0, 0.500556), (8, 0.3494074), (14, 0.08462436)]
9112 번째 문

9299 번째 문서의 topic 비율은 [(2, 0.05452977), (4, 0.023729831), (7, 0.30083555), (8, 0.12047816), (9, 0.06893955), (13, 0.4143186)]
9300 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.05), (12, 0.05), (13, 0.05), (14, 0.05), (15, 0.05), (16, 0.05), (17, 0.05), (18, 0.05), (19, 0.05)]
9301 번째 문서의 topic 비율은 [(3, 0.054910097), (4, 0.47882202), (6, 0.027015332), (8, 0.23110412), (9, 0.14828368), (12, 0.014117323), (18, 0.036702562)]
9302 번째 문서의 topic 비율은 [(1, 0.01887484), (2, 0.081312485), (8, 0.38737357), (9, 0.44844604), (10, 0.051267702)]
9303 번째 문서의 topic 비율은 [(3, 0.043022115), (4, 0.052567407), (7, 0.03439017), (8, 0.593292), (10, 0.018467441), (13, 0.12291545), (14, 0.058590602), (15, 0.031369764), (18, 0.013385703), (19, 0.025132678)]
9304 번째 문서의 topic 비율은 [(2, 0.08370275), (3, 0.052748688), (8, 0.3757557), (10, 0.29531145), (17, 0.1422736), (19, 0.03852338)]
9305 번째 문서의 topic 비율은 [(2, 0.36

9517 번째 문서의 topic 비율은 [(2, 0.13664427), (3, 0.011811816), (7, 0.020039437), (8, 0.067741185), (10, 0.11517552), (12, 0.036238905), (13, 0.5813989), (15, 0.01617341)]
9518 번째 문서의 topic 비율은 [(4, 0.6904065), (9, 0.26865613)]
9519 번째 문서의 topic 비율은 [(2, 0.22966287), (4, 0.21277703), (5, 0.01727297), (7, 0.28873932), (8, 0.124724396), (13, 0.08923767), (19, 0.026734643)]
9520 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.05), (12, 0.05), (13, 0.05), (14, 0.05), (15, 0.05), (16, 0.05), (17, 0.05), (18, 0.05), (19, 0.05)]
9521 번째 문서의 topic 비율은 [(2, 0.7675949), (6, 0.12049698), (12, 0.079188816)]
9522 번째 문서의 topic 비율은 [(7, 0.5489333), (8, 0.39104807)]
9523 번째 문서의 topic 비율은 [(7, 0.11169163), (8, 0.58518314), (9, 0.1750199), (14, 0.08429574), (16, 0.027483813)]
9524 번째 문서의 topic 비율은 [(2, 0.38224238), (6, 0.45769328), (13, 0.1213942)]
9525 번째 문서의 topic 비율은 [(1, 0.017633839), (2, 0.23178688), (6, 0.

9708 번째 문서의 topic 비율은 [(8, 0.40731552), (13, 0.5108241)]
9709 번째 문서의 topic 비율은 [(2, 0.9331974), (12, 0.016351433), (13, 0.037163038)]
9710 번째 문서의 topic 비율은 [(0, 0.43971023), (2, 0.05738356), (5, 0.029763725), (7, 0.035972107), (8, 0.085217744), (10, 0.27097228), (14, 0.06229238)]
9711 번째 문서의 topic 비율은 [(2, 0.23949252), (3, 0.035121977), (8, 0.33458778), (13, 0.29118657), (17, 0.033088412), (18, 0.05786774)]
9712 번째 문서의 topic 비율은 [(1, 0.033661008), (2, 0.075294994), (4, 0.015047556), (6, 0.018081661), (7, 0.0629564), (8, 0.6131467), (9, 0.053895433), (10, 0.047850013), (17, 0.033295017), (19, 0.027053425)]
9713 번째 문서의 topic 비율은 [(1, 0.045626033), (4, 0.14815818), (7, 0.29947403), (8, 0.23977888), (9, 0.19028771), (15, 0.04619975)]
9714 번째 문서의 topic 비율은 [(8, 0.3974977), (9, 0.14327009), (13, 0.29101187), (15, 0.06490008), (17, 0.030095607), (18, 0.055244137)]
9715 번째 문서의 topic 비율은 [(0, 0.4941161), (1, 0.027093375), (6, 0.21909678), (7, 0.06780299), (8, 0.09070162), (16, 0.08367261)]
9716

9915 번째 문서의 topic 비율은 [(2, 0.5125018), (3, 0.025838837), (7, 0.02882692), (8, 0.20797522), (12, 0.018889206), (13, 0.049666625), (15, 0.14582935)]
9916 번째 문서의 topic 비율은 [(0, 0.02624341), (2, 0.43983757), (4, 0.017495092), (7, 0.017270565), (8, 0.30054262), (15, 0.13531575), (16, 0.0322446), (19, 0.011536579)]
9917 번째 문서의 topic 비율은 [(0, 0.016667692), (1, 0.01666769), (2, 0.01666769), (3, 0.01666769), (4, 0.01666769), (5, 0.01666769), (6, 0.01666769), (7, 0.01666769), (8, 0.01666769), (9, 0.01666769), (10, 0.01666769), (11, 0.01666769), (12, 0.01666769), (13, 0.01666769), (14, 0.01666769), (15, 0.01666769), (16, 0.68331385), (17, 0.01666769), (18, 0.01666769), (19, 0.01666769)]
9918 번째 문서의 topic 비율은 [(8, 0.37560263), (9, 0.32153225), (10, 0.07147372), (15, 0.10266062), (16, 0.11898073)]
9919 번째 문서의 topic 비율은 [(5, 0.045629647), (6, 0.30083942), (9, 0.51785934), (18, 0.100872874)]
9920 번째 문서의 topic 비율은 [(2, 0.26546568), (3, 0.3975846), (6, 0.092126764), (7, 0.054787863), (8, 0.065408215), 

10140 번째 문서의 topic 비율은 [(6, 0.15858382), (7, 0.09196716), (8, 0.68393713)]
10141 번째 문서의 topic 비율은 [(4, 0.07121149), (8, 0.06924923), (9, 0.83523333), (13, 0.015208787)]
10142 번째 문서의 topic 비율은 [(0, 0.19426945), (2, 0.045760125), (4, 0.0825459), (8, 0.42794177), (10, 0.09407197), (12, 0.07225747), (15, 0.023265392), (17, 0.023860054), (19, 0.023486536)]
10143 번째 문서의 topic 비율은 [(2, 0.43552244), (3, 0.116384745), (5, 0.04768009), (8, 0.1955373), (13, 0.07753369), (14, 0.049375203), (18, 0.048370782)]
10144 번째 문서의 topic 비율은 [(2, 0.09604949), (3, 0.020217821), (8, 0.7843542), (9, 0.051998887), (19, 0.034628466)]
10145 번째 문서의 topic 비율은 [(2, 0.41025278), (4, 0.40436164), (5, 0.026204238), (6, 0.13916671)]
10146 번째 문서의 topic 비율은 [(6, 0.045316737), (8, 0.23695582), (9, 0.70253736)]
10147 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.05), (12, 0.05), (13, 0.05), (14, 0.05), (15, 0.05), (16, 0.05),

10371 번째 문서의 topic 비율은 [(2, 0.7993642), (4, 0.12560663)]
10372 번째 문서의 topic 비율은 [(1, 0.052115023), (2, 0.636722), (3, 0.26863483)]
10373 번째 문서의 topic 비율은 [(7, 0.02094594), (9, 0.92965454), (19, 0.035223044)]
10374 번째 문서의 topic 비율은 [(0, 0.016669001), (1, 0.016669001), (2, 0.016669001), (3, 0.016669001), (4, 0.016669001), (5, 0.016669001), (6, 0.016669001), (7, 0.016669001), (8, 0.683289), (9, 0.016669001), (10, 0.016669001), (11, 0.016669001), (12, 0.016669001), (13, 0.016669001), (14, 0.016669001), (15, 0.016669001), (16, 0.016669001), (17, 0.016669001), (18, 0.016669001), (19, 0.016669001)]
10375 번째 문서의 topic 비율은 [(2, 0.7196248), (4, 0.022817576), (7, 0.06102413), (8, 0.115818635), (16, 0.016955882), (19, 0.052622568)]
10376 번째 문서의 topic 비율은 [(2, 0.5451696), (4, 0.070246115), (7, 0.24930097), (10, 0.11621764)]
10377 번째 문서의 topic 비율은 [(3, 0.10987709), (4, 0.116964795), (8, 0.5702419), (14, 0.122834474)]
10378 번째 문서의 topic 비율은 [(6, 0.16195825), (7, 0.09516287), (8, 0.3620524), (16, 0.30

10574 번째 문서의 topic 비율은 [(0, 0.5384162), (1, 0.028331412), (2, 0.079414554), (3, 0.031250857), (5, 0.02778029), (6, 0.08990062), (14, 0.04750818), (16, 0.07358129), (17, 0.040359348), (19, 0.029450279)]
10575 번째 문서의 topic 비율은 [(2, 0.572672), (4, 0.15979049), (7, 0.051725868), (8, 0.11366516), (9, 0.044358555), (13, 0.026514648)]
10576 번째 문서의 topic 비율은 [(6, 0.38720804), (8, 0.27175635), (9, 0.16241895), (14, 0.02358464), (17, 0.10596264), (19, 0.03655098)]
10577 번째 문서의 topic 비율은 [(3, 0.15336296), (6, 0.42453676), (16, 0.3157783)]
10578 번째 문서의 topic 비율은 [(2, 0.10204259), (6, 0.037061065), (7, 0.45218566), (13, 0.39490154)]
10579 번째 문서의 topic 비율은 [(0, 0.3864868), (7, 0.10072647), (9, 0.06664816), (10, 0.3173684), (17, 0.10901436)]
10580 번째 문서의 topic 비율은 [(0, 0.0125030065), (1, 0.0125030065), (2, 0.0125030065), (3, 0.0125030065), (4, 0.0125030065), (5, 0.0125030065), (6, 0.012503008), (7, 0.0125030065), (8, 0.26251918), (9, 0.0125030065), (10, 0.0125030065), (11, 0.0125030065), (12, 0.01250

10785 번째 문서의 topic 비율은 [(2, 0.43286178), (4, 0.4410959), (7, 0.026507821), (8, 0.07103834), (19, 0.016583204)]
10786 번째 문서의 topic 비율은 [(2, 0.52768296), (4, 0.057919867), (8, 0.3703472), (19, 0.037207495)]
10787 번째 문서의 topic 비율은 [(2, 0.17945895), (4, 0.44139785), (8, 0.072096236), (9, 0.2678793), (11, 0.022827601)]
10788 번째 문서의 topic 비율은 [(9, 0.92689973)]
10789 번째 문서의 topic 비율은 [(2, 0.42163768), (4, 0.32165778), (8, 0.19606608), (9, 0.031229805), (19, 0.017095175)]
10790 번째 문서의 topic 비율은 [(1, 0.06222794), (2, 0.21292105), (7, 0.2019387), (8, 0.33610907), (10, 0.11024288), (11, 0.062252395)]
10791 번째 문서의 topic 비율은 [(0, 0.05), (1, 0.05), (2, 0.05), (3, 0.05), (4, 0.05), (5, 0.05), (6, 0.05), (7, 0.05), (8, 0.05), (9, 0.05), (10, 0.05), (11, 0.05), (12, 0.05), (13, 0.05), (14, 0.05), (15, 0.05), (16, 0.05), (17, 0.05), (18, 0.05), (19, 0.05)]
10792 번째 문서의 topic 비율은 [(0, 0.377085), (1, 0.042815156), (8, 0.4655511), (19, 0.08253388)]
10793 번째 문서의 topic 비율은 [(0, 0.10850009), (2, 0.24869432), 

10994 번째 문서의 topic 비율은 [(4, 0.029825855), (6, 0.061859123), (8, 0.45308942), (9, 0.4448281)]
10995 번째 문서의 topic 비율은 [(0, 0.021236807), (2, 0.16639648), (4, 0.019338768), (6, 0.01666211), (8, 0.07767616), (12, 0.026669867), (13, 0.6117805), (19, 0.041878678)]
10996 번째 문서의 topic 비율은 [(8, 0.15894929), (9, 0.7762919), (10, 0.038178552)]
10997 번째 문서의 topic 비율은 [(2, 0.12802759), (6, 0.6458736), (8, 0.20108075)]
10998 번째 문서의 topic 비율은 [(1, 0.115665525), (2, 0.40574932), (3, 0.20037518), (7, 0.10572464), (9, 0.024696145), (16, 0.13648921)]
10999 번째 문서의 topic 비율은 [(9, 0.9712045)]
11000 번째 문서의 topic 비율은 [(0, 0.016674122), (1, 0.016674122), (2, 0.016674122), (3, 0.016674122), (4, 0.016674122), (5, 0.34987763), (6, 0.016674122), (7, 0.016674122), (8, 0.016674122), (9, 0.016674122), (10, 0.016674122), (11, 0.016674122), (12, 0.34998813), (13, 0.016674122), (14, 0.016674122), (15, 0.016674122), (16, 0.016674122), (17, 0.016674122), (18, 0.016674122), (19, 0.016674122)]
11001 번째 문서의 topic 비율은 [(6, 0.

11206 번째 문서의 topic 비율은 [(2, 0.20772558), (3, 0.013449002), (7, 0.1714859), (8, 0.042226683), (12, 0.01566153), (13, 0.53399014), (14, 0.012105775)]
11207 번째 문서의 topic 비율은 [(2, 0.3716653), (4, 0.03196004), (5, 0.021865176), (6, 0.03750748), (8, 0.20197678), (10, 0.025416987), (12, 0.046826363), (13, 0.21215901), (15, 0.030020393), (18, 0.010109709)]
11208 번째 문서의 topic 비율은 [(7, 0.10515436), (8, 0.75024337), (12, 0.0678714), (19, 0.04471181)]
11209 번째 문서의 topic 비율은 [(2, 0.19818538), (8, 0.5714323), (10, 0.037832946), (12, 0.035788357), (15, 0.13085201)]
11210 번째 문서의 topic 비율은 [(4, 0.6621293), (9, 0.3166592), (18, 0.0125299385)]
11211 번째 문서의 topic 비율은 [(2, 0.03281652), (6, 0.05775285), (9, 0.7886725), (14, 0.03657344), (17, 0.059907593)]
11212 번째 문서의 topic 비율은 [(0, 0.82107127), (18, 0.097059734)]
11213 번째 문서의 topic 비율은 [(2, 0.21828137), (4, 0.20382285), (6, 0.28238112), (7, 0.110810764), (12, 0.02405367), (16, 0.045771815), (18, 0.10041678)]
11214 번째 문서의 topic 비율은 [(0, 0.2613898), (2, 0.46

In [37]:
# 더 깔끔하게 출력.
def make_topictable_per_doc(ldamodel, corpus):
    topic_table = pd.DataFrame()
    
    for i, topic_list in enumerate(ldamodel[corpus]):
        doc = topic_list[0] if ldamodel.per_word_topics else topic_list
        doc = sorted(doc, key=lambda x: (x[1]), reverse=True)    # 비중이 높은 순으로 토픽 정렬
        
        # 가장 비중이 높은 토픽 인덱스, 비중 값, 전체 토픽 리스트
        topic_table = topic_table.append(pd.Series([doc[0][0], round(doc[0][1], 4), topic_list]), ignore_index=True)
    
    return topic_table

In [39]:
topictable = make_topictable_per_doc(ldamodel, corpus)
topictable = topictable.reset_index()    # 문서 번호를 의미하는 열로 사용
topictable.columns = ['문서 번호', '가장 비중이 높은 토픽', '가장 높은 토픽의 비중', '각 토픽의 비중']
topictable[:10]

Unnamed: 0,문서 번호,가장 비중이 높은 토픽,가장 높은 토픽의 비중,각 토픽의 비중
0,0,8.0,0.3836,"[(2, 0.15162021), (6, 0.16131221), (8, 0.38361..."
1,1,9.0,0.3475,"[(6, 0.14696093), (7, 0.2321774), (8, 0.125817..."
2,2,8.0,0.5512,"[(0, 0.04179684), (2, 0.050970294), (6, 0.0777..."
3,3,6.0,0.3977,"[(4, 0.28465807), (6, 0.3977327), (8, 0.2112057)]"
4,4,7.0,0.351,"[(3, 0.14370409), (4, 0.09284805), (7, 0.35103..."
5,5,13.0,0.3478,"[(2, 0.32270494), (7, 0.30769378), (13, 0.3477..."
6,6,2.0,0.3551,"[(2, 0.35513243), (3, 0.060435), (6, 0.2782454..."
7,7,9.0,0.9913,"[(9, 0.99128217)]"
8,8,6.0,0.9208,"[(6, 0.9208158)]"
9,9,9.0,0.4811,"[(2, 0.04537963), (6, 0.3233918), (8, 0.112225..."
