In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn
from collections import Counter
import pandas as pd
from text import TEXT
from preprocess import clean_by_freq
from preprocess import clean_by_len
from preprocess import clean_by_stopwords
from preprocess import stemming_by_porter
from preprocess import pos_tagger
from preprocess import words_lemmatizer
from preprocess import swn_polarity
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('vader_lexicon')

# 데이터 불러오기
df = pd.read_csv('imdb.tsv', delimiter = "\\t")

# 대소문자 통합
df['review'] = df['review'].str.lower()

# 문장 토큰화
df['sent_tokens'] = df['review'].apply(sent_tokenize)

# 품사 태깅
df['pos_tagged_tokens'] = df['sent_tokens'].apply(pos_tagger)

# 표제어 추출
df['lemmatized_tokens'] = df['pos_tagged_tokens'].apply(words_lemmatizer)

# 추가 전처리
stopwords_set = set(stopwords.words('english'))

df['cleaned_tokens'] = df['lemmatized_tokens'].apply(lambda x: clean_by_freq(x, 1))
df['cleaned_tokens'] = df['cleaned_tokens'].apply(lambda x: clean_by_len(x, 2))
df['cleaned_tokens'] = df['cleaned_tokens'].apply(lambda x: clean_by_stopwords(x, stopwords_set))

# SentiWordnet 감성 분석
df['swn_sentiment'] = df['pos_tagged_tokens'].apply(swn_polarity)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\parkf\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\parkf\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\parkf\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\parkf\AppData\Roaming\nltk_data...
[nltk_d

In [2]:
senti_analyzer = SentimentIntensityAnalyzer()

text1 = "This is a great movie!"
text2 = "This is a terrible movie!"
text3 = "This movie was just okay."

# VADER 감성 분석
senti_scores_text1 = senti_analyzer.polarity_scores(text1)
senti_scores_text2 = senti_analyzer.polarity_scores(text2)
senti_scores_text3 = senti_analyzer.polarity_scores(text3)

In [3]:
# 결과 확인
print(senti_scores_text1)
print(senti_scores_text2)
print(senti_scores_text3)

{'neg': 0.0, 'neu': 0.406, 'pos': 0.594, 'compound': 0.6588}
{'neg': 0.531, 'neu': 0.469, 'pos': 0.0, 'compound': -0.5255}
{'neg': 0.0, 'neu': 0.678, 'pos': 0.322, 'compound': 0.2263}


In [4]:
# SentiWordNet 감성 분석 레슨에서 진행했던 데이터로 VADER 감성 분석
df[['review']]

Unnamed: 0,review
0,"""watching time chasers, it obvious that it was..."
1,i saw this film about 20 years ago and remembe...
2,"minor spoilers in new york, joan barnard (elvi..."
3,i went to see this film with a great deal of e...
4,"""yes, i agree with everyone on this site this ..."
5,"""jennifer ehle was sparkling in \""""pride and p..."
6,amy poehler is a terrific comedian on saturday...
7,"""a plane carrying employees of a large biotech..."
8,"a well made, gritty science fiction movie, it ..."
9,"""incredibly dumb and utterly predictable story..."


In [5]:
# 감성 분석 함수
def vader_sentiment(text):
    analyzer = SentimentIntensityAnalyzer()
    
    # VADER 감성 분석
    senti_score = analyzer.polarity_scores(text)['compound']
    
    return senti_score

In [6]:
df['vader_sentiment'] = df['review'].apply(vader_sentiment)

df[['review', 'swn_sentiment', 'vader_sentiment']]

Unnamed: 0,review,swn_sentiment,vader_sentiment
0,"""watching time chasers, it obvious that it was...",-0.375,-0.9095
1,i saw this film about 20 years ago and remembe...,-1.5,-0.9694
2,"minor spoilers in new york, joan barnard (elvi...",-2.25,-0.2794
3,i went to see this film with a great deal of e...,-0.5,-0.9707
4,"""yes, i agree with everyone on this site this ...",3.0,0.8049
5,"""jennifer ehle was sparkling in \""""pride and p...",6.75,0.9494
6,amy poehler is a terrific comedian on saturday...,0.75,0.8473
7,"""a plane carrying employees of a large biotech...",8.75,0.9885
8,"a well made, gritty science fiction movie, it ...",4.5,0.9887
9,"""incredibly dumb and utterly predictable story...",-1.125,-0.7375
