In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

#Through Vader module, we will divide our dataset in two types : Negative and Positive
#By sentiment analysis scores from user reivews and rating.
#The Vader module, need to be downloaded
nltk.download('vader_lexicon')

text_reviews = pd.read_csv('/content/drive/MyDrive/Team#2_LDA/tripadvisor_hotel_reviews.csv')
text_reviews.head(10)



[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Unnamed: 0,Review,Rating
0,nice hotel expensive parking got good deal sta...,4
1,ok nothing special charge diamond member hilto...,2
2,nice rooms not 4* experience hotel monaco seat...,3
3,"unique, great stay, wonderful time hotel monac...",5
4,"great stay great stay, went seahawk game aweso...",5
5,love monaco staff husband stayed hotel crazy w...,5
6,"cozy stay rainy city, husband spent 7 nights m...",5
7,"excellent staff, housekeeping quality hotel ch...",4
8,"hotel stayed hotel monaco cruise, rooms genero...",5
9,excellent stayed hotel monaco past w/e delight...,5


In [3]:
#Drop NaN values for better result.
text_reviews.dropna()

Unnamed: 0,Review,Rating
0,nice hotel expensive parking got good deal sta...,4
1,ok nothing special charge diamond member hilto...,2
2,nice rooms not 4* experience hotel monaco seat...,3
3,"unique, great stay, wonderful time hotel monac...",5
4,"great stay great stay, went seahawk game aweso...",5
...,...,...
19596,"best kept secret 3rd time staying charm, not 5...",5
19597,great location price view hotel great quick pl...,4
19598,"ok just looks nice modern outside, desk staff ...",2
19599,hotel theft ruined vacation hotel opened sept ...,1


In [4]:
#Bring sentiment scoring model to build our own data set.
Sentiment_analysis = SentimentIntensityAnalyzer()

#Each sentiment dataset to be separated.
pos_reviews = pd.DataFrame(columns = ['review'])
neg_reviews = pd.DataFrame(columns = ['review'])
idx_pos = 0
idx_neg = 0

#Function will return scores from by sentiment analysis as, 1 = Positive, 0.5 = Neutral, 0 = Negative
def scoring_unit(review):
  analyzed_scores = Sentiment_analysis.polarity_scores(review)
  scoring = 0.5

  if analyzed_scores['compound'] >= 0.7:
    scoring = 1
  elif analyzed_scores['compound'] <= 0:
    scoring = 0

  return scoring

#Save the reviews with the score data and ratings from Users
for idx, row in text_reviews.iterrows():
  
  # Break it, since we don't want to read false datas.
  if idx == len(text_reviews['Review']):
    break
  
  temp_score = scoring_unit(row['Review'])

  if temp_score == 1 and row['Rating'] >= 4:
    pos_reviews.at[idx_pos, 'review'] = row['Review']
    idx_pos += 1
  elif temp_score == 0 and row['Rating'] <= 2:
    neg_reviews.at[idx_neg, 'review'] = row['Review']
    idx_neg += 1

In [5]:
pos_reviews.head(10)

Unnamed: 0,review
0,nice hotel expensive parking got good deal sta...
1,"unique, great stay, wonderful time hotel monac..."
2,"great stay great stay, went seahawk game aweso..."
3,love monaco staff husband stayed hotel crazy w...
4,"cozy stay rainy city, husband spent 7 nights m..."
5,"excellent staff, housekeeping quality hotel ch..."
6,"hotel stayed hotel monaco cruise, rooms genero..."
7,excellent stayed hotel monaco past w/e delight...
8,nice value seattle stayed 4 nights late 2007. ...
9,nice hotel good location hotel kimpton design ...


In [6]:
neg_reviews.head(10)

Unnamed: 0,review
0,"bad choice, booked hotel hot wire called immed..."
1,warwick bad good reviews warwick shocks staff ...
2,"austin powers decor familiar, hotel seattlewhe..."
3,"hated inn terrible, room-service horrible staf..."
4,disappointed arranging anticipated girl weeken...
5,"stay clear, internet reservation friday rang h..."
6,single rooms like hospital rooms single rooms ...
7,seattle crown plaza not worth money got late h...
8,worst hotel experience booked nonsmoking room ...
9,old dumpy place problems head starts spinning ...


In [7]:
# 이 뒤로 바로 코딩 하실거면 이 셀은 실행시키지 않으셔도 좋습니다.
#Save our processed reviews for further use.
pos_reviews.to_csv("/content/drive/MyDrive/Team#2_LDA/pos_review.csv")
neg_reviews.to_csv("/content/drive/MyDrive/Team#2_LDA/neg_review.csv")

In [9]:
# 이 뒤로 바로 코딩 하실거면 이 셀은 실행시키지 않으셔도 좋습니다.
#Free all memories.
del [[text_reviews, pos_reviews, neg_reviews]]

NameError: ignored