# 감성 분석 점수 계산
- 최종모델로 감성분석을 진행한 데이터셋을 이용해 각 관광지의 감성분석 점수를 계산한다. 

In [2]:
import random
import numpy as np 

import time
import datetime
import pandas as pd
import os
from tqdm import tqdm

In [3]:
pj_path = os.getenv('HOME') + '/Projects/JeongCheck'
data_path = pj_path + '/prediction_data_nsmc'

In [4]:
file_list = os.listdir(data_path)
print(len(file_list))
file_list

15


['water_pred.csv',
 'art_pred.csv',
 'oworld_pred.csv',
 'observatory_pred.csv',
 'dongchundang_pred.csv',
 'hanbat_pred.csv',
 'yuseong_pred.csv',
 'expo_pred.csv',
 'gyejok_pred.csv',
 'daecheong_pred.csv',
 'jangtae_pred.csv',
 'uineungjeongi_pred.csv',
 'science_pred.csv',
 'ppuri_pred.csv',
 'sungsimdang_pred.csv']

In [5]:
places = []
for file in file_list:
    tmp = file.split('_')[0]
    places.append(tmp)

In [6]:
places

['water',
 'art',
 'oworld',
 'observatory',
 'dongchundang',
 'hanbat',
 'yuseong',
 'expo',
 'gyejok',
 'daecheong',
 'jangtae',
 'uineungjeongi',
 'science',
 'ppuri',
 'sungsimdang']

In [7]:
test = pd.read_csv(data_path + f'/{file_list[0]}')
test.head()

Unnamed: 0.1,Unnamed: 0,name,ratings,date,comment,search,keyword,label,pred
0,0,김성주,5,9시간 전,가편하게 산책과 등산을 할 수 있는 곳,계룡산국립공원수통골지구,water_barrel,1,1
1,1,서알콩,5,1일 전,커피 한잔하기 딱 좋은 거리에요,계룡산국립공원수통골지구,water_barrel,1,1
2,2,윤설희,5,2일 전,비 오고 난 수통골,계룡산국립공원수통골지구,water_barrel,1,0
3,3,이상훈,4,2일 전,가족 나들이하기 정말 좋은 곳 국립공원이라 관리도 잘 돼있네요,계룡산국립공원수통골지구,water_barrel,1,1
4,4,송순이공주,5,2일 전,동네라서 자주 가는데 갈 때마다 좋은 곳이에요 이번에는 뱀이 나와서 좀 놀랐어요,계룡산국립공원수통골지구,water_barrel,1,1


In [8]:
def sentimentScorePred(df):
    data_len = len(df)
    data_pred = df.pred.sum()
    score = (data_pred / data_len) *100
    return '%0.2f' %score

- 함수 테스트

In [10]:
score = sentimentScorePred(test)
score

'80.88'

In [11]:
for place, file_name in zip(places, file_list):
    print(file_name)

water_pred.csv
art_pred.csv
oworld_pred.csv
observatory_pred.csv
dongchundang_pred.csv
hanbat_pred.csv
yuseong_pred.csv
expo_pred.csv
gyejok_pred.csv
daecheong_pred.csv
jangtae_pred.csv
uineungjeongi_pred.csv
science_pred.csv
ppuri_pred.csv
sungsimdang_pred.csv


In [12]:
score_dict = {}
for place, file_name in zip(places, file_list):
    tmp_data = pd.read_csv(data_path + f'/{file_name}')
    score = sentimentScorePred(tmp_data)
    score_dict[place] = [score]

In [13]:
score_dict

{'water': ['80.88'],
 'art': ['81.29'],
 'oworld': ['79.28'],
 'observatory': ['80.77'],
 'dongchundang': ['82.87'],
 'hanbat': ['81.34'],
 'yuseong': ['77.39'],
 'expo': ['76.79'],
 'gyejok': ['82.38'],
 'daecheong': ['74.85'],
 'jangtae': ['88.63'],
 'uineungjeongi': ['69.07'],
 'science': ['77.43'],
 'ppuri': ['82.86'],
 'sungsimdang': ['77.73']}

In [14]:
sort_score = sorted(score_dict, key=score_dict.get, reverse=True)

In [15]:
sort_score

['jangtae',
 'dongchundang',
 'ppuri',
 'gyejok',
 'hanbat',
 'art',
 'water',
 'observatory',
 'oworld',
 'sungsimdang',
 'science',
 'yuseong',
 'expo',
 'daecheong',
 'uineungjeongi']

In [16]:
pd.DataFrame.from_dict(score_dict)

Unnamed: 0,water,art,oworld,observatory,dongchundang,hanbat,yuseong,expo,gyejok,daecheong,jangtae,uineungjeongi,science,ppuri,sungsimdang
0,80.88,81.29,79.28,80.77,82.87,81.34,77.39,76.79,82.38,74.85,88.63,69.07,77.43,82.86,77.73
