## KOSAC 감성사전 데이터셋을 활용한 감정 단어 추출
http://word.snu.ac.kr/kosac/lexicon.php

In [1]:
import pandas as pd
import random
import nltk

nltk.download('punkt')

import numpy as np
from numpy import dot
from sklearn.manifold import TSNE
import gensim 
import gensim.models as g
import nltk
from gensim.models import Word2Vec
from nltk.tokenize import RegexpTokenizer
from nltk.tokenize import word_tokenize, WordPunctTokenizer, TreebankWordTokenizer, RegexpTokenizer, sent_tokenize

[nltk_data] Downloading package punkt to /Users/lifeofpy/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
pre_type = pd.read_csv('./subjectivity-type.csv')
pre_type

Unnamed: 0,ngram,freq,Agreement,Argument,Emotion,Intention,Judgment,Others,Speculation,max.value,max.prop
0,가*/JKS,1,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.0,Argument,1.000000
1,가*/VV,3,0.000000,0.333333,0.000000,0.000000,0.666667,0.000000,0.0,Judgment,0.666667
2,가/JKC,17,0.058824,0.352941,0.000000,0.000000,0.588235,0.000000,0.0,Judgment,0.588235
3,가/JKS,112,0.008929,0.330357,0.053571,0.008929,0.571429,0.026786,0.0,Judgment,0.571429
4,가/VV,11,0.000000,0.727273,0.000000,0.000000,0.181818,0.090909,0.0,Argument,0.727273
...,...,...,...,...,...,...,...,...,...,...,...
16357,힘겹/VA;게/EC;버티/VV,1,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.0,Argument,1.000000
16358,힘들/VA;고/EC;외롭/VA,1,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.0,Argument,1.000000
16359,힘들/VA;ㄹ/ETM;것/NNB,1,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.0,Argument,1.000000
16360,힘들/VA;ㄹ/ETM;때/NNG,1,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.0,Argument,1.000000


In [3]:
emotion_type = pre_type[['ngram', 'max.value']]

In [4]:
emotion_type = emotion_type[emotion_type['max.value']=='Emotion']
emotion_type

Unnamed: 0,ngram,max.value
8,가깝/VA,Emotion
21,가슴/NNG,Emotion
36,각별/XR,Emotion
37,각별히/MAG,Emotion
55,감/NNG,Emotion
...,...,...
16207,함성/NNG;을/JKO;지르/VV,Emotion
16214,해방감/NNG;을/JKO;느끼/VV,Emotion
16223,행복/NNG;을/JKO;찾/VV,Emotion
16227,행복/NNG;하/XSA;지/EC,Emotion


In [5]:
emotion_type['ngram'].iloc[0]

'가깝/VA'

### NNG 태그

In [6]:
targets = ['NNG']
emotion_type_NNG = emotion_type[emotion_type['ngram'].map(lambda x: all(string in x for string in targets))]
emotion_type_NNG = emotion_type_NNG[:87]

In [7]:
emotion_type_NNG

Unnamed: 0,ngram,max.value
21,가슴/NNG,Emotion
55,감/NNG,Emotion
59,감동/NNG,Emotion
60,감사/NNG,Emotion
62,감탄사/NNG,Emotion
...,...,...
3370,호감/NNG,Emotion
3371,호기심/NNG,Emotion
3409,환영/NNG,Emotion
3413,환희/NNG,Emotion


### XR 태그

In [8]:
targets = ['XR']
emotion_type_XR = emotion_type[emotion_type['ngram'].map(lambda x: all(string in x for string in targets))]
emotion_type_XR = emotion_type_XR
emotion_type_XR = emotion_type_XR[:8]
emotion_type_XR

Unnamed: 0,ngram,max.value
36,각별/XR,Emotion
1968,쓸쓸/XR,Emotion
1969,씁쓸/XR,Emotion
2964,착잡/XR,Emotion
3030,촉촉/XR,Emotion
3088,캄캄/XR,Emotion
3385,홀가분/XR,Emotion
3424,황홀/XR,Emotion


### VA 태그

In [9]:
targets = ['VA']
emotion_type_VA = emotion_type[emotion_type['ngram'].map(lambda x: all(string in x for string in targets))]
emotion_type_VA = emotion_type_VA[:14]

In [10]:
emotion_type_VA

Unnamed: 0,ngram,max.value
8,가깝/VA,Emotion
121,거칠/VA,Emotion
706,놀랍/VA,Emotion
996,딱하/VA,Emotion
1480,부끄럽/VA,Emotion
1869,슬프/VA,Emotion
1963,쑥스럽/VA,Emotion
2025,안스럽/VA,Emotion
2030,안타깝/VA,Emotion
2055,애닯/VA,Emotion


In [11]:
# VA 태그와 NNG 태그, XR 태그 합치기
emotion_words = pd.concat([emotion_type_NNG, emotion_type_VA, emotion_type_XR])

In [12]:
emotion_words

Unnamed: 0,ngram,max.value
21,가슴/NNG,Emotion
55,감/NNG,Emotion
59,감동/NNG,Emotion
60,감사/NNG,Emotion
62,감탄사/NNG,Emotion
...,...,...
2964,착잡/XR,Emotion
3030,촉촉/XR,Emotion
3088,캄캄/XR,Emotion
3385,홀가분/XR,Emotion


In [13]:
# 감정단어 ngram 으로 polarity.csv 에 접근해주기
emo = emotion_words['ngram']
emo = emo.tolist()

In [14]:
pol = pd.read_csv('./polarity.csv')
pol

Unnamed: 0,ngram,freq,COMP,NEG,NEUT,None,POS,max.value,max.prop
0,가*/JKS,1,0.0,0.0,0.0,0.0,1.0,POS,1.0
1,가*/JKS;있/VV,1,0.0,0.0,0.0,0.0,1.0,POS,1.0
2,가*/JKS;있/VV;었/EP,1,0.0,0.0,0.0,0.0,1.0,POS,1.0
3,가*/VV,3,0.0,0.0,0.0,0.0,1.0,POS,1.0
4,가*/VV;ㄴ다*/EF,1,0.0,0.0,0.0,0.0,1.0,POS,1.0
...,...,...,...,...,...,...,...,...,...
16357,힘들/VA;ㄹ/ETM;것/NNB,1,0.0,1.0,0.0,0.0,0.0,NEG,1.0
16358,힘들/VA;ㄹ/ETM;때/NNG,1,0.0,1.0,0.0,0.0,0.0,NEG,1.0
16359,힘차/VA,1,0.0,1.0,0.0,0.0,0.0,NEG,1.0
16360,힘차/VA;ㄴ/ETM,1,0.0,1.0,0.0,0.0,0.0,NEG,1.0


In [15]:
real_emotional_words = pol[pol['ngram'].isin(emo)]
real_emotional_words = real_emotional_words[['ngram', 'max.value', 'max.prop']]
real_emotional_words.iloc[:, 2]

177      0.5
221      0.4
330      1.0
373      0.5
395      1.0
        ... 
16170    1.0
16182    1.0
16207    1.0
16308    1.0
16312    1.0
Name: max.prop, Length: 109, dtype: float64

In [16]:
real_emotional_words['max.prop'][real_emotional_words['max.value'] == 'NEG'] = -(real_emotional_words['max.prop'][real_emotional_words['max.value'] == 'NEG'])

In [17]:
real_emotional_words

Unnamed: 0,ngram,max.value,max.prop
177,가깝/VA,NEG,-0.5
221,가슴/NNG,POS,0.4
330,각별/XR,POS,1.0
373,감/NNG,NEG,-0.5
395,감동/NNG,POS,1.0
...,...,...,...
16170,환영/NNG,POS,1.0
16182,환희/NNG,POS,1.0
16207,황홀/XR,POS,1.0
16308,흠모/NNG,POS,1.0


In [18]:
# ngram 에서 태그 삭제하고 문자열만 남기기

real_emotional_words['ngram'] = real_emotional_words['ngram'].apply(lambda x: x.replace('/VA', '').replace('/NNG', '').replace('/XR', ''))

In [19]:
# 난수 리스트를 만들어서 리스트를 열로 추가하기
random_numbers_1 = []
random_numbers_2 = []

for i in range(len(real_emotional_words)):
    random_numbers_1.append(round(random.random()*random.choice((-1,1)), 1))
    random_numbers_2.append(round(random.random()*random.choice((-1,1)), 1))

In [20]:
real_emotional_words['max.arousal'] = random_numbers_1
real_emotional_words['max.dominance'] = random_numbers_2

real_emotional_words

Unnamed: 0,ngram,max.value,max.prop,max.arousal,max.dominance
177,가깝,NEG,-0.5,-0.1,-0.7
221,가슴,POS,0.4,-0.5,-0.6
330,각별,POS,1.0,-0.6,-0.0
373,감,NEG,-0.5,0.7,0.8
395,감동,POS,1.0,0.3,0.2
...,...,...,...,...,...
16170,환영,POS,1.0,-0.7,0.2
16182,환희,POS,1.0,-0.9,-0.1
16207,황홀,POS,1.0,0.5,-0.7
16308,흠모,POS,1.0,0.2,-0.8


In [21]:
# ㄹㅇ 전처리 다하고 나중에 저장
real_emotional_words.to_excel('emotional_dict.xlsx')

## 이제 해야하는 것
- 1. 비감정단어를 기준으로 감정 단어(real_emotional_words)가 드러난 댓글을 모은다.
- 2. 댓글 하나에 비감정단어 --- 감정단어 쌍이 있다면, 감정단어를 기준으로 비감정단어에 드러난 감정이 매핑된다.
- 3. 비감정단어를 기준으로, 댓글에 드러난 긍정 또는 부정 감정의 총 점수를 sum 해서 더한다.

   ex. 크리스마스 느낌 나니까 황홀하다. >> POS, 1.0
   
   ex. 크리스마스 노래 들으니까 흥겹네. >> POS, 1.0
   
   ex. 노래 들으니까 이번 크리스마스는 혼자 보낼 것 같아서 슬프네... >> NEG, -1.0 (총 댓글이 3개라고 가정했을 때, 총 점수는 1.0점)
   
   
- 4. 만약에 '너무', '정말', '진짜', '매우', '굉장히' 와 같은 정도를 표현하는 부사가 있을 경우, 강조하는 의미로 해당 점수를 한번 더 더한다.
- 5. 여기서 감정 단어의 경우 ngram 과 유사한 단어를 뽑기 위해 **텍스트 유사도를 활용**하며, 텍스트 유사도가 0.7 이상인 경우, 동의어로 간주해 해당 감정 단어를 선택한다.

## Word2Vec 을 이용한 댓글 토크나이징 및 연관도가 높은 단어 추출

### 1. 크롤링

In [22]:
# 댓글에 위의 비감정단어가 있으면, 댓글을 따로 저장한다.
# 필요한 라이브러리 불러오기
import os
import warnings
warnings.filterwarnings(action='ignore')
from selenium import webdriver as wd
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import requests
import time
import random
import re
import pandas as pd
from konlpy.tag import Okt
from collections import Counter
from soynlp.utils import DoublespaceLineCorpus
from soynlp.noun import LRNounExtractor_v2
# from soykeyword.lasso import LassoKeywordExtractor
okt = Okt()

In [23]:
# 불용어 사전 불러오기
with open('stop_words.txt', 'r') as file:
    stop_word = file.readline()
    stop_word = str(stop_word)
    
stop_word = stop_word.replace("\ufeff", '').replace("'", '').replace(",", '').replace('\n', '').replace("’", '').replace("‘", '')
stop_words = stop_word.split()

In [24]:
# 불용어 사전 불러오기
with open('stop_words-nouns.txt', 'r') as file:
    stop_words_nouns = file.readline()
    stop_words_nouns = str(stop_words_nouns)
    
stop_words_nouns = stop_words_nouns.replace("\ufeff", '').replace("'", '').replace(",", '').replace('\n', '').replace("’", '').replace("‘", '')
stop_words_nouns = stop_words_nouns.split()

In [25]:
# 곡 제목, 유튜브 링크가 있는 데이터 프레임(엑셀 파일)을 불러옴
data = pd.read_excel('./melon_music_list_sample.xlsx')
data

Unnamed: 0,Genre,Song Title,Site Link,Comments
0,,자각몽,https://www.melon.com/song/detail.htm?songId=3...,
1,,약속(Forever),https://www.melon.com/song/detail.htm?songId=3...,
2,,YEPPI YEPPI,https://www.melon.com/song/detail.htm?songId=3...,
3,,눈,https://www.melon.com/song/detail.htm?songId=3...,
4,,크리스마스라서,https://www.melon.com/song/detail.htm?songId=5...,


In [26]:
data_lst = data['Site Link'].tolist() # Series to List
data_lst

['https://www.melon.com/song/detail.htm?songId=34041588',
 'https://www.melon.com/song/detail.htm?songId=33265226',
 'https://www.melon.com/song/detail.htm?songId=34041586',
 'https://www.melon.com/song/detail.htm?songId=30753021',
 'https://www.melon.com/song/detail.htm?songId=5515286']

In [27]:
all_melon_comments = []

In [28]:
for k in range(len(data)):
    
    # 댓글 페이지 개수 구하기
    driver = wd.Chrome(executable_path='/Users/lifeofpy/Desktop/chromedriver')
    page_url = data_lst[k] # url 에 페이지 링크를 하나씩 담아준다
    driver.get(page_url)
    comments = []
    
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    pages = soup.find_all('span', {'class': 'd_cmtpgn_srch_cnt'})[0]

    # 일단 추출된 태그를 문자열로 바꾸기
    pages = str(pages)

    # 정규식을 통해 태그에서 숫자(=댓글 개수)만 추출하기
    cmt_num = re.findall('\d+', pages)
    page_num = "".join(cmt_num)
    page_num = int(page_num)
    page_num = page_num//10
        
    # 댓글 페이지마다 바뀔 url 생성
    url = page_url + '#cmtpgn=&pageNo={}&sortType=0&srchType=2&srchWord='
    
    
    for i in range(0, page_num+1):
        link = url.format(i) # {} 안에 들어갈 문자열 포매팅
        driver.get(link)
        driver.implicitly_wait(2)

        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')
        comment = soup.find_all('div', {'class': 'd_cmtpgn_cmt_full_contents'})

        for r in comment:
            comments.append(r.get_text().strip())
    
    # 가수 이름을 멜론으로부터 추출해서 나올 수 있는 불용어 추가하기
    singer = soup.find(attrs={'class':'artist_name'}).find('span').text
    singer = str(singer)
    singer = singer.split()
    for i in range(len(singer)):
        if "'" in singer[i]:
            real_singer = str(singer[0:-1]).replace('[', '').replace(']', '').replace(',', '').replace("'", '')
        elif "(" in singer[i] and ")" in singer[i]:
            real_singer = singer[0]
        else:
            real_singer = "".join(singer[i])
            
    print(real_singer)
    
    # 가수 이름을 네이버에 검색해서 영문 가수 이름을 한글로 바꿔 singer 에 저장하기 (예시: aespa >> 에스파)
    # 드라이버 url 멜론 >> 네이버로 바꿔주기
    naver_url = 'https://search.naver.com/search.naver?where=nexearch&sm=top_hty&fbm=1&ie=utf8&query=%s'%(real_singer)
    driver.get(naver_url)
    driver.implicitly_wait(2)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    name_of_singer = soup.find(attrs={'class':'title _title_ellipsis'}).find('span').text
    
    
    if len(name_of_singer) != 0:
        real_singer = soup.find(attrs={'class':'title _title_ellipsis'}).find('span').text
    else:
        real_singer = singer[0]
    
    print(real_singer)
    
    stop_words_nouns.append(real_singer) # 에스파
    stop_words_nouns.append(real_singer + '님') # 에스파님
    stop_words_nouns.append(real_singer + '님들') # 에스파님들
    stop_words_nouns.append('갓' + real_singer) # 갓에스파
    stop_words_nouns.append(real_singer + '짱') # 에스파짱
    stop_words_nouns.append(real_singer + '분들') # 에스파분들
    stop_words_nouns.append(real_singer + '멤버') # 에스파멤버
    stop_words_nouns.append(real_singer + '멤버들') #에스파멤버들
    stop_words_nouns.append(real_singer + '님들목소리') #에스파님들목소리
    stop_words_nouns.append(real_singer + '멤버들목소리') #에스파멤버들목소리
    
    
    # 가수가 2명 이상의 그룹이라면, 멤버 이름도 불용어에 추가하기 (예시: 카리나, 윈터, 지젤, 닝닝)
    member_lst = []

    is_member_more_than_2 = soup.find(attrs={'class':'info_group'}).find('dt').text
    
    if is_member_more_than_2 == '멤버':
        soup = soup.find(attrs={'class':'info_group'}).find('dd')
        soup.find_all('a')
        
    member_lst.append(soup.text)
        
    member_lst = str(member_lst).replace("'", "").replace("[", "").replace("]", "")
    member_lst = member_lst.split(',')
    
    for m in range(len(member_lst)):
        member_lst[m] = member_lst[m].replace(' ', '')
    
    for i in range(len(member_lst)):
        stop_words_nouns.append(member_lst[i]) # 카리나
        stop_words_nouns.append(member_lst[i] + '님') # 카리나님
        stop_words_nouns.append('갓' + member_lst[i]) # 갓카리나
        stop_words_nouns.append(member_lst[i] + '짱') # 카리나짱
        stop_words_nouns.append(member_lst[i] + '목소리') # 카리나목소리
        stop_words_nouns.append(member_lst[i] + '님목소리') # 카리나님목소리
        stop_words_nouns.append(member_lst[i] + '부분') # 카리나부분
        stop_words_nouns.append(member_lst[i] + '파트') # 카리나파트
        stop_words_nouns.append(member_lst[i] + '보컬') # 카리나보컬
        stop_words_nouns.append(member_lst[i] + '랩') # 카리나랩
        
        
    # 멜론 댓글 모을 리스트 melon_comments 정의하기
    melon_comments = []
    melon_comments = comments
    
    # 댓글이 모인 comments 리스트에서 필요없는 문자 처리하기
    for j in range(len(melon_comments)):
        if '내용' in melon_comments[j]:
            melon_comments[j] = melon_comments[j].replace('내용', '').replace(' \t\t\t\t\t\t\t\t\t\t', '')
            all_melon_comments.append(melon_comments[j])

aespa
에스파
aespa
에스파
aespa
에스파
Zion.T
자이언티
비투비
비투비


### 2. Word2Vec 을 이용해 토크나이징

In [29]:
import nltk

nltk.download('punkt')

import numpy as np
from numpy import dot
from sklearn.manifold import TSNE
import gensim 
import gensim.models as g
import nltk
from gensim.models import Word2Vec
from nltk.tokenize import RegexpTokenizer
from nltk.tokenize import word_tokenize, WordPunctTokenizer, TreebankWordTokenizer, RegexpTokenizer, sent_tokenize

[nltk_data] Downloading package punkt to /Users/lifeofpy/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [30]:
# Word2Vec 모델 학습을 위해 댓글 문장 nltk 를 사용해서 토크나이징
tokens = []
# real_tokens = []

for i in range(len(all_melon_comments)):
    tokens.append(word_tokenize(all_melon_comments[i]))

In [39]:
# stop_words_nouns,stop_words 에 있는 불용어 제거
cnt = 0

for k in range(len(tokens)):
    for m in range(len(tokens[k])-cnt-2):     
        if tokens[k][m] in stop_words_nouns: 
#             print(tokens[k][m-cnt-2])
            tokens[k].remove(tokens[k][m])
#             print(tokens[k])
            cnt += 1
        elif tokens[k][m] in stop_words:
            tokens[k].remove(tokens[k][m])
            cnt += 1

### Word2Vec 모델 불러와서 학습하기

In [80]:
from gensim.models import Word2Vec

model = Word2Vec(tokens, alpha=0.025, window=3, min_count=5, sg=1)

model.train(tokens, total_examples=len(tokens), epochs=10)

(36264, 124160)

### Word2Vec 텍스트 유사도 계산 결과

In [81]:
model.wv.most_similar('크리스마스')

[('눈오는날', 0.9970307350158691),
 ('곡', 0.9969525337219238),
 ('너무좋다', 0.9968875646591187),
 ('듣게', 0.99685138463974),
 ('뮤비도', 0.9968032836914062),
 ('ㅠㅠㅠ', 0.9967662692070007),
 ('ㅎㅎ', 0.9967372417449951),
 ('혼자', 0.9967198371887207),
 ('보면서', 0.9967017769813538),
 ('그저', 0.9966915249824524)]

In [82]:
model.wv.most_similar('눈')

[('올까요', 0.996615469455719),
 ('새벽에', 0.9965831637382507),
 ('첫눈', 0.9962307214736938),
 ('항상', 0.9962219595909119),
 ('들을때마다', 0.9962148070335388),
 ('오늘', 0.9961804747581482),
 ('겨울은', 0.9961144328117371),
 ('겨울이', 0.9961076974868774),
 ('감사합니다', 0.9960626363754272),
 ('올', 0.99603271484375)]

In [83]:
model.wv.most_similar('새벽에')

[('난다', 0.9976624250411987),
 ('영화', 0.9976577758789062),
 ('창', 0.9976208209991455),
 ('갑자기', 0.9975693821907043),
 ('보면서', 0.9974923133850098),
 ('요즘', 0.9974326491355896),
 ('음악', 0.9974220395088196),
 ('그래서', 0.9974207282066345),
 ('밤에', 0.9973835945129395),
 ('왤케', 0.997367262840271)]

In [84]:
model.wv.most_similar('감성')

[('들을때마다', 0.9975342750549316),
 ('밤에', 0.997362494468689),
 ('장면이', 0.9973504543304443),
 ('나는', 0.9972647428512573),
 ('없는', 0.9972612261772156),
 ('레벨', 0.9971966743469238),
 ('느낌을', 0.997166633605957),
 ('설렌다', 0.9971655011177063),
 ('항상', 0.9971495270729065),
 ('벌써', 0.9971386790275574)]

In [100]:
model.wv.most_similar('밤에')

[('겨울은', 0.9978041052818298),
 ('그저', 0.9977619051933289),
 ('처음', 0.9977128505706787),
 ('아침에', 0.9976990818977356),
 ('뮤비도', 0.9976669549942017),
 ('노래랑', 0.9976514577865601),
 ('창', 0.997617781162262),
 ('갑자기', 0.9975770115852356),
 ('좋네', 0.9974988698959351),
 ('두', 0.9974835515022278)]

In [118]:
model.wv.most_similar('ㅠㅠ')

[('보컬', 0.9971093535423279),
 ('미쳤다', 0.9969885945320129),
 ('갑자기', 0.9969856142997742),
 ('곡', 0.9968917965888977),
 ('내가', 0.996845006942749),
 ('좋네', 0.996776819229126),
 ('좋아서', 0.9967634081840515),
 ('좋은데', 0.9967330694198608),
 ('좋네요', 0.9967125058174133),
 ('처음', 0.9967052936553955)]

In [125]:
model.wv.most_similar('감사합니다')

[('듣다가', 0.9978721141815186),
 ('기분', 0.997539758682251),
 ('난다', 0.9974873661994934),
 ('겨울이', 0.9974864721298218),
 ('마음이', 0.9974837303161621),
 ('아니라', 0.9974615573883057),
 ('느낌을', 0.9974393844604492),
 ('밤', 0.9974386692047119),
 ('노래만', 0.9973770976066589),
 ('없는', 0.9973725080490112)]

In [126]:
model.wv.most_similar('창')

[('겨울은', 0.9978320002555847),
 ('지금', 0.9977182745933533),
 ('그리고', 0.997697114944458),
 ('마음이', 0.9976503252983093),
 ('이문세가', 0.9976502656936646),
 ('영화', 0.9976339340209961),
 ('새벽에', 0.9976208209991455),
 ('노래만', 0.997619092464447),
 ('밤에', 0.997617781162262),
 ('올까요', 0.9976043701171875)]

In [129]:
model.wv.most_similar('뮤비')

[('너무좋다', 0.9976949095726013),
 ('알았는데', 0.9976745843887329),
 ('처음', 0.9976170659065247),
 ('같아', 0.9975126385688782),
 ('그저', 0.9974970817565918),
 ('듣다가', 0.9974378347396851),
 ('갓문세', 0.9973730444908142),
 ('보면서', 0.9973698258399963),
 ('괜히', 0.9973659515380859),
 ('감사합니다', 0.9973589181900024)]