# 모바일 게임 리뷰 토픽 모델링


## 분석 대상
: 블루아카이브, 니케, 원신, 붕괴:스타레일

### 토픽 모델링 기법
- LDA
- KeyBERT
- CTM(BERT-base)
- BERTopic

## 공통 전처리

- 접속사 제거 
- 조사 제거
- 한글,숫자 제외 텍스트 제거

In [2]:
import re
import pandas as pd
from soynlp.normalizer import repeat_normalize

In [3]:
with open('./data/stopwords_post_position.txt', 'r') as f:
    josa_lst = f.readlines()

with open('./data/stopword_conjunction.txt', 'r') as f:
    conjunction_lst = f.readline().split(', ')

# 불용어 처리
stopwords_pPosition = []
for josa in josa_lst:
    josa = re.sub('\n|\t', '', josa)
    if '/' in josa:
        josa_words = josa.split('/')
    else:
        josa_words = [josa]

    [stopwords_pPosition.append(word) for word in josa_words]

In [4]:
def pp_stopwords_pposition(txt, stopwords = stopwords_pPosition):
    
    split_words = txt.split()

    result = []
    for word in split_words:
        for length in range(max(map(len, stopwords)),0 , -1):
            if word[-length:] in stopwords:
                result.append(word[:-length])
                break
            elif length == 1:
                result.append(word)

    result = ' '.join(result)

    return result



def pp_stopwords_conjunction(txt, stopwords = conjunction_lst):
    for stopword in stopwords:
        if stopword in txt:

            # Stopword의 위치 찾기
            check_before_idx = re.search(stopword, txt).start() -1
            check_after_idx = re.search(stopword, txt).end() # idx가 아니라 번째 개념으로 자동으로 +1 되어있음

            # 시작위치가 첫번째일떄 예외처리
            if check_before_idx == -1:
                check_before_blank = True
            else:
                check_before_blank = True if txt[check_before_idx] == ' ' else False
            
            #종료지점이 끝위치일떄 예외처리
            if check_after_idx == len(txt):
                check_after_blank = True
            else:
                check_after_blank = True if txt[check_after_idx] == ' ' else False
            
            if check_before_blank and check_after_blank:
                txt = re.sub(stopword, ' ', txt).strip()
        
    return txt

def del_stopwords(txt):
    txt = pp_stopwords_conjunction(txt) # 접속사 제거
    txt = pp_stopwords_pposition(txt) # 조사 제거
    txt = re.sub('[^가-힣]', ' ', txt).strip() # 한글 제외 제거
    txt = repeat_normalize(txt, num_repeats=3)

    return txt

data = pd.read_csv('./data/reivews_df_preprocssing_ver.csv')
data['content'] = data['content'].apply(del_stopwords)

In [5]:
data = pd.read_csv('./data/reivews_df_preprocssing_ver.csv')
data['content'] = data['content'].apply(del_stopwords)

## LDA

상위 1,000개의 단어로 LDA진행

In [6]:
from konlpy.tag import Okt, Mecab
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation as LDA

mecab = Mecab()
okt = Okt()

In [7]:
review_content = data['content']
review_token_mecab = review_content.apply(mecab.nouns)
review_token_okt = review_content.apply(okt.pos)

def okt_token_div(tagging_lst):
    return [word[0] for word in tagging_lst if word[1] == 'Noun']

review_token_okt = review_token_okt = review_token_okt.apply(okt_token_div)


In [8]:
stopwords = ['도', '는', '다', '의', '가', '이', '은', '한', '에', '하', '고', '을', '를', '인', '듯',
              '과', '와', '네', '들', '듯', '지', '임', '게', '만', '되', '음', '면']

review_token_mecab = review_token_mecab.apply(lambda x: [item for item in x if item not in stopwords])
review_token_okt = review_token_okt.apply(lambda x: [item for item in x if item not in stopwords])

In [9]:
detokenized_mecab = []
detokenized_okt = []


for i in range(len(review_content)):
    t1 = ' '.join(review_token_mecab[i])
    t2 = ' '.join(review_token_okt[i])

    detokenized_mecab.append(t1)
    detokenized_okt.append(t2)


In [10]:
# Tfidf
vectorizer_m = TfidfVectorizer(max_features=1000)
vectorizer_o = TfidfVectorizer(max_features=1000)

X_1 = vectorizer_m.fit_transform(detokenized_mecab)
X_2 = vectorizer_o.fit_transform(detokenized_okt)

In [15]:
# LDA 모델링
lda_model_1 = LDA(n_components=30, learning_method='online', random_state=777, max_iter=1)
lda_model_1.fit_transform(X_1)

lda_model_2 = LDA(n_components=30, learning_method='online', random_state=777, max_iter=1)
lda_model_2.fit_transform(X_2)


array([[0.00709613, 0.00709613, 0.00709613, ..., 0.00709613, 0.00709613,
        0.00709613],
       [0.01427196, 0.01427196, 0.01427196, ..., 0.01427196, 0.01427196,
        0.01427196],
       [0.03333333, 0.03333333, 0.03333333, ..., 0.03333333, 0.03333333,
        0.03333333],
       ...,
       [0.03333333, 0.03333333, 0.03333333, ..., 0.03333333, 0.03333333,
        0.03333333],
       [0.01666667, 0.01666667, 0.01666667, ..., 0.01666667, 0.01666667,
        0.01666667],
       [0.01666667, 0.01666667, 0.01666667, ..., 0.01666667, 0.01666667,
        0.01666667]])

In [16]:
# 결과 확인
terms_m = vectorizer_m.get_feature_names_out()
terms_o = vectorizer_o.get_feature_names_out()

def get_topics(components, feature_names, n=5): 
    for idx, topic in enumerate(components):
        print("Topic %d:" % (idx+1), [(feature_names[i], topic[i].round(2)) for i in topic.argsort()[:-n - 1:-1]])

In [22]:
# LDA - Topics (Mecab)
get_topics(lda_model_1.components_,terms_m)

Topic 1: [('스마트폰', 843.31), ('게임', 175.54), ('다음', 137.63), ('어디', 120.49), ('이야기', 111.34)]
Topic 2: [('백도어', 585.76), ('개발', 427.99), ('스타', 372.62), ('출시', 270.12), ('게임', 227.46)]
Topic 3: [('번역', 3314.37), ('최고', 685.93), ('게임', 324.19), ('업체', 289.25), ('처음', 205.5)]
Topic 4: [('업데이트', 451.26), ('오류', 332.61), ('데이터', 146.94), ('명작', 71.82), ('고치', 67.57)]
Topic 5: [('개꿀', 317.9), ('배터리', 120.75), ('주기', 69.29), ('조아', 66.39), ('자기', 62.66)]
Topic 6: [('소리', 287.03), ('필요', 280.96), ('신경', 194.62), ('내용', 169.53), ('사운드', 118.39)]
Topic 7: [('확률', 367.08), ('이상', 353.04), ('조작', 303.22), ('가능', 150.08), ('얼마', 114.23)]
Topic 8: [('자체', 212.12), ('번역기', 183.62), ('기분', 84.12), ('울트라', 73.39), ('우리', 67.8)]
Topic 9: [('스킵', 519.77), ('시간', 488.78), ('개인', 372.44), ('자동', 256.26), ('게임', 231.36)]
Topic 10: [('표절', 450.08), ('이게', 429.87), ('정보', 417.01), ('이해', 400.58), ('삭제', 309.41)]
Topic 11: [('감사', 237.94), ('제발', 122.15), ('오랜만', 111.29), ('경험', 75.97), ('요즘', 51.27)]
Topic 12

In [23]:
# LDA - Topics (Okt)
get_topics(lda_model_2.components_,terms_o)

Topic 1: [('모험', 174.91), ('인생게임', 166.26), ('컨트롤', 126.01), ('하나', 119.42), ('간만', 91.29)]
Topic 2: [('스토리', 613.45), ('레일', 377.99), ('스타', 363.0), ('개발', 358.1), ('업데이트', 334.0)]
Topic 3: [('초반', 274.48), ('사운드', 131.1), ('지지', 59.47), ('시진핑', 33.8), ('만세', 23.35)]
Topic 4: [('재밋음', 103.6), ('잼남', 72.25), ('플스', 62.38), ('여러분', 58.84), ('바람', 55.36)]
Topic 5: [('퀄리티', 489.82), ('수준', 347.1), ('오픈월드', 198.98), ('자유도', 164.52), ('단점', 113.63)]
Topic 6: [('콘솔', 138.11), ('어디', 95.1), ('기분', 80.24), ('인정', 71.62), ('차라리', 62.82)]
Topic 7: [('뽑기', 517.15), ('제발', 457.84), ('확률', 322.96), ('조작', 284.89), ('삭제', 238.9)]
Topic 8: [('처음', 262.4), ('진행', 242.21), ('이상', 168.55), ('엉망', 167.29), ('실망', 133.68)]
Topic 9: [('그래픽', 914.31), ('젤다', 576.46), ('게임', 390.51), ('플레이', 290.49), ('정도', 266.0)]
Topic 10: [('꿀잼', 444.98), ('로딩', 163.92), ('고치', 150.79), ('다시', 126.79), ('굿굿', 110.34)]
Topic 11: [('진짜', 759.74), ('노잼', 220.75), ('사랑', 181.12), ('게임', 92.78), ('여기', 86.15)]
Topic 12: [('존잼'

#### 게임별 토픽

In [26]:
class custom_LDA():
    def __init__(self, content):
        self.content = content
        self.mecab = Mecab()
        self.okt = Okt()
        self.preprocessing()
    
    def preprocessing(self, n_features = 1000):
        review_token_mecab = self.content.apply(self.mecab.nouns)
        review_token_okt = self.content.apply(self.okt.pos)
        def okt_token_div(tagging_lst):
            return [word[0] for word in tagging_lst if word[1] == 'Noun']

        review_token_okt = review_token_okt.apply(okt_token_div)

        stopwords = ['도', '는', '다', '의', '가', '이', '은', '한', '에', '하', '고', '을', '를', '인', '듯',
              '과', '와', '네', '들', '듯', '지', '임', '게', '만', '되', '음', '면']

        review_token_mecab = review_token_mecab.apply(lambda x: [item for item in x if item not in stopwords])
        review_token_okt = review_token_okt.apply(lambda x: [item for item in x if item not in stopwords])

        self.detokenized_mecab = []
        self.detokenized_okt = []

        for i in range(len(self.content)):
            t1 = ' '.join(review_token_mecab[i])
            t2 = ' '.join(review_token_okt[i])

            self.detokenized_mecab.append(t1)
            self.detokenized_okt.append(t2)

        # Tfidf
        self.vectorizer_mecab = TfidfVectorizer(max_features=n_features)
        self.vectorizer_okt = TfidfVectorizer(max_features=n_features)

        self.modeling()
        
    def modeling(self):
        X_1 = self.vectorizer_mecab.fit_transform(self.detokenized_mecab)
        X_2 = self.vectorizer_okt.fit_transform(self.detokenized_okt)

        # LDA 모델링
        lda_model_1 = LDA(n_components=30, learning_method='online', random_state=777, max_iter=1)
        lda_model_1.fit_transform(X_1)
        self.lda_model_1_components = lda_model_1.components_

        lda_model_2 = LDA(n_components=30, learning_method='online', random_state=777, max_iter=1)
        lda_model_2.fit_transform(X_2)
        self.lda_model_2_components = lda_model_2.components_

        self.terms_mecab = vectorizer_m.get_feature_names_out()
        self.terms_okt = vectorizer_o.get_feature_names_out()

    def get_topics(self, tagger, keyword_num=5): 
        if tagger == 0:
            components = self.lda_model_1_components
            feature_names = self.terms_mecab
        elif tagger == 1:
            components = self.lda_model_2_components
            feature_names = self.terms_okt

        for idx, topic in enumerate(components):
            print("Topic %d:" % (idx+1), [(feature_names[i], topic[i].round(2)) for i in topic.argsort()[:-keyword_num - 1:-1]])

In [27]:
# b_a : 블루아카이브 / n_k : 니케 / o_g : 원신 / d_s : 붕괴:스타레일

b_a = data[data['app_name'] == '블루아카이브'].reset_index()['content']
n_k = data[data['app_name'] == '니케'].reset_index()['content']
o_g = data[data['app_name'] == '원신'].reset_index()['content']
d_s = data[data['app_name'] == '붕괴:스타레일'].reset_index()['content']

In [28]:
b_a_LDA = custom_LDA(b_a)
n_k_LDA = custom_LDA(n_k)
o_g_LDA = custom_LDA(o_g)
d_s_LDA = custom_LDA(d_s)

In [36]:
print('::: Topics - Mecab :::')
b_a_LDA.get_topics(0, keyword_num = 5)
print()
print('::: Topics - Okt :::')
b_a_LDA.get_topics(1, keyword_num= 5)
print()

::: Topics - Mecab :::
Topic 1: [('확률', 19.02), ('없음', 17.33), ('인터페이스', 10.75), ('아쉬움', 3.89), ('가요', 2.98)]
Topic 2: [('여러분', 49.93), ('상품', 43.02), ('절대', 32.41), ('제작자', 26.56), ('자연', 22.94)]
Topic 3: [('최소', 57.87), ('상담', 23.1), ('기종', 15.37), ('선정', 13.86), ('이걸', 12.65)]
Topic 4: [('방송', 141.31), ('접속', 43.78), ('숨결', 20.02), ('파악', 16.01), ('오역', 13.83)]
Topic 5: [('개돼지', 281.72), ('상당', 41.58), ('방랑자', 38.91), ('검열', 31.32), ('우분', 16.63)]
Topic 6: [('상자', 36.01), ('오랜만', 16.47), ('분위기', 15.92), ('구글', 13.22), ('보이', 12.46)]
Topic 7: [('홍보', 188.21), ('리소스', 121.28), ('상품', 74.08), ('조합', 44.21), ('공개', 40.43)]
Topic 8: [('강제', 24.89), ('버튜', 14.43), ('번역기', 11.13), ('별점', 3.69), ('엉망', 1.85)]
Topic 9: [('최적', 29.24), ('시야', 26.11), ('환경', 14.65), ('상성', 11.38), ('개추', 11.34)]
Topic 10: [('안하', 72.74), ('비추', 45.33), ('호우', 35.1), ('텍스트', 33.34), ('추천', 27.64)]
Topic 11: [('경우', 35.74), ('미래', 31.72), ('일본', 28.15), ('설문', 22.92), ('게임', 21.37)]
Topic 12: [('보스', 35.49), ('정

In [37]:
print('::: Topics - Mecab :::')
n_k_LDA.get_topics(0, keyword_num = 5)
print()
print('::: Topics - Okt :::')
n_k_LDA.get_topics(1, keyword_num= 5)
print()

::: Topics - Mecab :::
Topic 1: [('콘솔', 101.52), ('성장', 27.7), ('검색', 19.77), ('테러', 11.19), ('네트워크', 10.54)]
Topic 2: [('효율', 33.47), ('다행', 26.28), ('특성', 19.66), ('상담', 16.36), ('무지', 15.16)]
Topic 3: [('불가', 61.37), ('캐시', 53.19), ('청불', 47.66), ('최소한', 37.93), ('우분', 34.43)]
Topic 4: [('코코미', 46.55), ('무기', 29.53), ('신작', 26.72), ('퀘스트', 26.57), ('나오', 20.97)]
Topic 5: [('우분', 260.18), ('마다', 46.76), ('다인', 35.07), ('계정', 32.26), ('돌파', 31.33)]
Topic 6: [('전투', 178.66), ('오랫동안', 87.2), ('원소', 37.54), ('축하', 36.6), ('오픈', 22.87)]
Topic 7: [('서브', 61.95), ('오랜만', 33.87), ('역대', 25.06), ('주인공', 13.31), ('눈물', 12.26)]
Topic 8: [('다양', 83.51), ('하늘', 31.46), ('준비', 24.41), ('우분', 20.14), ('희망', 15.45)]
Topic 9: [('게임패드', 481.8), ('다이아', 50.06), ('가격', 48.84), ('저격', 37.05), ('보석', 35.98)]
Topic 10: [('최적', 86.85), ('생김', 68.97), ('아쉬움', 43.52), ('이정', 33.4), ('처음', 15.16)]
Topic 11: [('취급', 73.88), ('칭찬', 56.61), ('그대', 49.9), ('버튼', 33.61), ('게임패드', 28.24)]
Topic 12: [('갤럭시', 11.19), 

In [38]:
print('::: Topics - Mecab :::')
o_g_LDA.get_topics(0, keyword_num = 5)
print()
print('::: Topics - Okt :::')
o_g_LDA.get_topics(1, keyword_num= 5)
print()

::: Topics - Mecab :::
Topic 1: [('원하', 229.7), ('조사', 205.81), ('혼자', 189.01), ('에피소드', 154.39), ('게이', 107.97)]
Topic 2: [('스트레스', 189.12), ('사람', 43.5), ('상향', 33.72), ('시기', 24.02), ('축제', 13.19)]
Topic 3: [('전환', 175.68), ('하드', 96.98), ('사운드', 84.71), ('시즌', 75.41), ('점수', 72.25)]
Topic 4: [('최저', 427.8), ('만족', 115.41), ('게이', 75.05), ('호시노', 59.99), ('아래', 32.3)]
Topic 5: [('게이', 1696.05), ('개판', 285.81), ('목소리', 128.8), ('부담', 82.38), ('힐링', 63.98)]
Topic 6: [('건지', 102.19), ('코드', 92.04), ('케릭', 71.74), ('주인공', 68.51), ('단계', 54.98)]
Topic 7: [('사실', 185.58), ('불편', 179.24), ('이전', 151.16), ('나부', 103.66), ('이름', 71.32)]
Topic 8: [('설정', 317.95), ('우리', 274.48), ('공개', 159.06), ('전설', 155.7), ('부재', 136.2)]
Topic 9: [('다인', 236.9), ('옵션', 178.17), ('뉴비', 162.86), ('유도', 146.46), ('게이', 80.4)]
Topic 10: [('센터', 279.95), ('로비', 228.26), ('로그인', 175.02), ('감안', 111.49), ('배경', 103.49)]
Topic 11: [('삭제', 236.26), ('현실', 146.1), ('해명', 104.04), ('순간', 90.92), ('진짜', 81.45)]
Topic 

In [39]:
print('::: Topics - Mecab :::')
d_s_LDA.get_topics(0, keyword_num = 5)
print()
print('::: Topics - Okt :::')
d_s_LDA.get_topics(1, keyword_num= 5)
print()

::: Topics - Mecab :::
Topic 1: [('복구', 118.4), ('개선', 51.62), ('식량', 30.46), ('설문', 15.78), ('하거', 12.93)]
Topic 2: [('감사', 17.4), ('단점', 16.2), ('반천', 10.53), ('가능', 10.09), ('예약', 10.01)]
Topic 3: [('개발', 31.37), ('필드', 19.5), ('미카', 19.21), ('한마디', 17.38), ('복구', 15.93)]
Topic 4: [('해결', 2.14), ('중복', 1.38), ('성우진', 0.83), ('분량', 0.82), ('명작', 0.05)]
Topic 5: [('치명', 27.93), ('서비스', 23.99), ('검수', 21.44), ('이걸', 20.73), ('마리안', 19.41)]
Topic 6: [('실력', 27.74), ('나락', 13.16), ('엔드', 5.65), ('밸런스', 5.62), ('엉망', 4.9)]
Topic 7: [('업적', 16.6), ('복구', 8.91), ('종려', 7.27), ('제미', 3.19), ('구매', 3.07)]
Topic 8: [('오류', 31.21), ('실화', 12.9), ('예상', 6.89), ('성인', 6.4), ('인연', 2.87)]
Topic 9: [('유지', 10.19), ('아래', 7.68), ('우분', 7.39), ('인증', 4.45), ('도박', 3.7)]
Topic 10: [('지금', 6.65), ('챕터', 4.66), ('오역', 2.4), ('몰드', 1.43), ('정작', 1.32)]
Topic 11: [('요청', 23.38), ('코코미', 19.73), ('업데이트', 18.24), ('취급', 13.7), ('소비', 11.67)]
Topic 12: [('원석', 96.52), ('시일', 91.55), ('축하', 75.4), ('이동', 37.5

In [43]:
ba = data[data['app_name'] == '블루아카이브']
b_a_pos = ba[ba['score'] > 3].reset_index()['content']
b_a_na = ba[ba['score'] < 3].reset_index()['content']

nk = data[data['app_name'] == '니케'].reset_index()
n_k_pos = nk[nk['score'] > 3].reset_index()['content']
n_k_na = nk[nk['score'] < 3].reset_index()['content']

og = data[data['app_name'] == '원신'].reset_index()
o_g_pos = og[og['score'] > 3].reset_index()['content']
o_g_na = og[og['score'] < 3].reset_index()['content']

ds = data[data['app_name'] == '붕괴:스타레일'].reset_index()
d_s_pos = ds[ds['score'] > 3].reset_index()['content']
d_s_na = ds[ds['score'] < 3].reset_index()['content']

In [44]:
ba_LDA_pos = custom_LDA(b_a_pos)
ba_LDA_na = custom_LDA(b_a_na)

nk_LDA_pos = custom_LDA(n_k_pos)
nk_LDA_na = custom_LDA(n_k_na)

og_LDA_pos = custom_LDA(o_g_pos)
og_LDA_na = custom_LDA(o_g_na)

ds_LDA_pos = custom_LDA(d_s_pos)
ds_LDA_na = custom_LDA(d_s_na)

In [45]:
print('::: Topics - Mecab :::')
ba_LDA_pos.get_topics(0, keyword_num = 5)
ba_LDA_na.get_topics(0, keyword_num=5)
print()
print('::: Topics - Okt :::')
ba_LDA_pos.get_topics(1, keyword_num= 5)
ba_LDA_na.get_topics(1, keyword_num=5)
print()

::: Topics - Mecab :::
Topic 1: [('무과', 7.72), ('현금', 6.38), ('스타일', 5.03), ('사용', 4.99), ('발전', 3.88)]
Topic 2: [('시간', 36.97), ('방치', 7.08), ('기념', 5.9), ('음식', 3.98), ('기믹', 3.18)]
Topic 3: [('버프', 16.44), ('본인', 5.12), ('파일', 4.37), ('통제', 2.07), ('체력', 2.06)]
Topic 4: [('티켓', 32.99), ('컨셉', 16.83), ('이건', 11.41), ('나락', 8.68), ('삼성', 7.17)]
Topic 5: [('오역', 31.74), ('겁니다', 16.6), ('형식', 13.86), ('이야기', 12.37), ('그대', 11.24)]
Topic 6: [('무시', 7.36), ('가입', 6.59), ('쿠폰', 6.08), ('맞음', 5.81), ('패스', 4.26)]
Topic 7: [('기기', 100.14), ('아야카', 70.62), ('부실', 44.93), ('불안', 43.11), ('겁니다', 31.24)]
Topic 8: [('욕심', 18.81), ('실패', 10.53), ('이틀', 7.78), ('다인', 4.64), ('정지', 3.81)]
Topic 9: [('불편', 38.61), ('부재', 21.22), ('거부', 6.54), ('희망', 5.57), ('대가리', 3.0)]
Topic 10: [('키우', 22.17), ('호불호', 10.96), ('컴퓨터', 10.35), ('마찬가지', 10.06), ('주기', 10.0)]
Topic 11: [('감사', 27.35), ('혼자', 22.5), ('시도', 21.31), ('최소', 16.64), ('호요', 13.8)]
Topic 12: [('일부', 15.7), ('의견', 11.29), ('일반', 9.64), ('언제', 

In [59]:
print('::: Topics - Mecab :::')
og_LDA_pos.get_topics(0, keyword_num = 5)
og_LDA_na.get_topics(0, keyword_num=5)
print()
print('::: Topics - Okt :::')
og_LDA_pos.get_topics(1, keyword_num= 5)
og_LDA_na.get_topics(1, keyword_num=5)
print()

::: Topics - Mecab :::
Topic 1: [('요구', 112.08), ('말씀', 83.37), ('쿠폰', 68.68), ('리뷰', 54.96), ('게임패드', 54.8)]
Topic 2: [('탄압', 166.9), ('개인', 112.81), ('로비', 54.48), ('정지', 49.97), ('숙제', 44.88)]
Topic 3: [('충전', 150.69), ('언어', 62.87), ('카프카', 43.22), ('작동', 27.96), ('동일', 24.06)]
Topic 4: [('최애', 415.17), ('게임패드', 96.02), ('니다', 74.9), ('불가', 50.52), ('흥미', 38.77)]
Topic 5: [('글로벌', 459.61), ('예상', 138.48), ('시간', 123.35), ('우분', 105.1), ('게임패드', 99.99)]
Topic 6: [('풀돌', 205.62), ('게임패드', 166.37), ('인증', 144.19), ('다운로드', 111.99), ('최소한', 97.57)]
Topic 7: [('방법', 212.89), ('추후', 162.4), ('게임패드', 145.19), ('문장', 124.67), ('스킬', 123.15)]
Topic 8: [('해당', 70.43), ('미안', 45.44), ('집중', 39.26), ('페이', 33.93), ('걸리', 28.13)]
Topic 9: [('시스템', 196.26), ('개월', 196.07), ('작용', 152.45), ('엉망', 70.72), ('형식', 65.01)]
Topic 10: [('위치', 175.02), ('마지막', 53.92), ('스킵', 49.5), ('게임패드', 36.07), ('무기', 33.97)]
Topic 11: [('스킬', 368.52), ('종류', 129.58), ('오타', 71.36), ('연차', 71.03), ('사양', 46.63)]
Top

In [60]:
print('::: Topics - Mecab :::')
ds_LDA_pos.get_topics(0, keyword_num = 5)
ds_LDA_na.get_topics(0, keyword_num=5)
print()
print('::: Topics - Okt :::')
ds_LDA_pos.get_topics(1, keyword_num= 5)
ds_LDA_na.get_topics(1, keyword_num=5)
print()

::: Topics - Mecab :::
Topic 1: [('기존', 3.81), ('이오리', 0.36), ('블루아', 0.18), ('한국', 0.17), ('유료', 0.17)]


Topic 2: [('개성', 25.46), ('답변', 5.08), ('컨트롤러', 3.84), ('선택', 3.75), ('일정', 2.49)]
Topic 3: [('버프', 22.38), ('풀돌', 8.4), ('스테이지', 5.56), ('울트라', 5.16), ('경우', 4.93)]
Topic 4: [('이게', 3.8), ('시로코', 1.94), ('모델', 0.08), ('명작', 0.08), ('신작', 0.08)]
Topic 5: [('기회', 100.34), ('참고', 1.28), ('특성', 1.04), ('거부', 0.67), ('보이', 0.63)]
Topic 6: [('사태', 4.77), ('종류', 2.65), ('필요', 2.27), ('흑우', 2.14), ('최대', 1.87)]
Topic 7: [('이거', 4.01), ('고치', 3.44), ('역대', 3.22), ('치명', 2.4), ('전환', 2.33)]
Topic 8: [('뉴비', 4.26), ('라이트', 3.63), ('부탁', 3.45), ('센터', 2.23), ('경험', 1.64)]
Topic 9: [('패드', 2.49), ('클릭', 2.37), ('낭비', 2.14), ('할만', 1.99), ('엉망', 1.31)]
Topic 10: [('블루아', 35.48), ('스타일', 33.15), ('레전드', 32.94), ('유료', 15.82), ('경험', 9.48)]
Topic 11: [('카드', 35.05), ('사랑', 22.88), ('컨셉', 10.81), ('리어', 5.07), ('시점', 4.62)]
Topic 12: [('적대', 32.22), ('모델', 5.13), ('경험', 3.51), ('오랜만', 3.39), ('콘텐츠', 3.09)]
Topic 13: [('추가', 49.58), ('마찬가지', 8.24), ('경험', 6.91), ('지급', 4.92), ('지속', 4.23)]
Topic 14: [(

In [62]:
len(n_k_pos), len(n_k_na)

(5033, 5255)