### 비지도학습 기반 감성 분석 소개
#### SentiWordNet 을 이용한 Sentiment Analysis
- WordNet Synset과 SentiWordNet SentiSynset 클래스의 이해

In [43]:
from nltk.corpus import wordnet as wn

In [44]:
term = 'function'
# 'sun' 라는 단어로 wordnet의 synsets(단어의 의미와 구성 및 품사등을 저장하고 있는 객체) 생성.
synsets = wn.synsets(term)
print('synsets() 반환 type : ', type(synsets))
print('synsets() 반환 값 갯수 : ', len(synsets))
print('synsets() 반환 값 : ', synsets)

synsets() 반환 type :  <class 'list'>
synsets() 반환 값 갯수 :  10
synsets() 반환 값 :  [Synset('function.n.01'), Synset('function.n.02'), Synset('function.n.03'), Synset('function.n.04'), Synset('function.n.05'), Synset('affair.n.03'), Synset('routine.n.03'), Synset('function.v.01'), Synset('serve.v.01'), Synset('officiate.v.02')]


In [45]:
for synset in synsets :
    print('##### Synset 이름 : ', synset.name(), '#####')
    print('POS : ', synset.lexname())
    print('Definition : ', synset.definition())
    print('Lemmas: ', synset.lemma_names())

##### Synset 이름 :  function.n.01 #####
POS :  noun.relation
Definition :  (mathematics) a mathematical relation such that each element of a given set (the domain of the function) is associated with an element of another set (the range of the function)
Lemmas:  ['function', 'mathematical_function', 'single-valued_function', 'map', 'mapping']
##### Synset 이름 :  function.n.02 #####
POS :  noun.attribute
Definition :  what something is used for
Lemmas:  ['function', 'purpose', 'role', 'use']
##### Synset 이름 :  function.n.03 #####
POS :  noun.act
Definition :  the actions and activities assigned to or required or expected of a person or group
Lemmas:  ['function', 'office', 'part', 'role']
##### Synset 이름 :  function.n.04 #####
POS :  noun.relation
Definition :  a relation such that one thing is dependent on another
Lemmas:  ['function']
##### Synset 이름 :  function.n.05 #####
POS :  noun.group
Definition :  a formal or official social gathering or ceremony
Lemmas:  ['function']
##### Synset

In [46]:
term = 'tiger'
synsets = wn.synsets(term)
for synset in synsets :
    print('##### Synset 이름 : ', synset.name(), '#####')
    print('POS : ', synset.lexname())
    print('Definition : ', synset.definition())
    print('Lemmas: ', synset.lemma_names())

##### Synset 이름 :  tiger.n.01 #####
POS :  noun.person
Definition :  a fierce or audacious person
Lemmas:  ['tiger']
##### Synset 이름 :  tiger.n.02 #####
POS :  noun.animal
Definition :  large feline of forests in most of Asia having a tawny coat with black stripes; endangered
Lemmas:  ['tiger', 'Panthera_tigris']


In [47]:
# synset 객체를 단어별로 생성합니다
tree = wn.synset('tree.n.01')
lion = wn.synset('lion.n.01')
tiger = wn.synset('tiger.n.02')
cat = wn.synset('cat.n.01')
dog = wn.synset('dog.n.01')
print(tree.definition())
print()
print(tiger.definition())

a tall perennial woody plant having a main trunk and branches forming a distinct elevated crown; includes both gymnosperms and angiosperms

large feline of forests in most of Asia having a tawny coat with black stripes; endangered


In [48]:
print(tree.name())

tree.n.01


In [49]:
print(tree.name().split('.')[0])

tree


In [50]:
entities = [tree, lion, tiger, cat, dog]
entity_names = [entity.name().split('.')[0] for entity in entities]
print(entity_names)

['tree', 'lion', 'tiger', 'cat', 'dog']


In [51]:
print(entities[0].path_similarity(entities[1], 2))

0.07142857142857142


In [52]:
print(entities[2].path_similarity(entities[3], 2))

0.25


In [53]:
print(entities[3].path_similarity(entities[4], 2))

0.2


In [54]:
# entities[0]으로 나머지 전체 데이터와 유사도 측정 : 결과가 리스트
# entities[1]으로 나머지 전체 데이터와 유사도 측정 : 결과가 리스트
# entities[2]으로 나머지 전체 데이터와 유사도 측정 : 결과가 리스트
# entities[3]으로 나머지 전체 데이터와 유사도 측정 : 결과가 리스트
# entities[4]으로 나머지 전체 데이터와 유사도 측정 : 결과가 리스트
# 결과 리스트들을 별도의 리스트에 append -> 2차원 

similarities = []
for entity in entities :
    similarity = [round(entity.path_similarity(nestedEntity), 2) for nestedEntity in entities]
    similarities.append(similarity)
print(similarities)

[[1.0, 0.07, 0.07, 0.08, 0.12], [0.07, 1.0, 0.33, 0.25, 0.17], [0.07, 0.33, 1.0, 0.25, 0.17], [0.08, 0.25, 0.25, 1.0, 0.2], [0.12, 0.17, 0.17, 0.2, 1.0]]


In [55]:
import pandas as pd
similarity_df = pd.DataFrame(similarities, columns=entity_names, index=entity_names)
similarity_df

Unnamed: 0,tree,lion,tiger,cat,dog
tree,1.0,0.07,0.07,0.08,0.12
lion,0.07,1.0,0.33,0.25,0.17
tiger,0.07,0.33,1.0,0.25,0.17
cat,0.08,0.25,0.25,1.0,0.2
dog,0.12,0.17,0.17,0.2,1.0


In [57]:
def vader_polarity(review, threshold=0.1):
    analyzer = SentimentIntensityAnalyzer()
    scores = analyzer.polarity_scores(review)
    
    # compound 값에 기반하여 threshold 입력값보다 크면 1, 그렇지않으면 0을 반환
    agg_score = scores['compound']
    final_sentiment = 1 if agg_score >= threshold else 0
    return final_sentiment

In [58]:
review_df['vader_preds'] = review_df['review'].apply(lambda x : vader_polarity(x, 0.1))
y_target = review_df['sentiment'].values
vader_preds = review_df['vader_preds'].values

NameError: name 'review_df' is not defined

In [None]:
print('#### VADER 예측 성능 평가 ####')
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score
from sklearn.metrics import recall_score, f1_score, roc_auc_score
print(confusion_matrix(y_target, vader_preds))
print("정확도 : ", accuracy_score(y_target, vader_preds))
print("정밀도 : ", precision_score(y_target, vader_preds))
print("재현율 : ", recall_score(y_target, vader_preds))