In [7]:
import requests
import json

def translate(query):
    url = "https://asia-northeast3-skilled-chalice-402604.cloudfunctions.net/translate"

    headers = {'Content-Type' : 'application/json'}
    payload = json.dumps({'queries' : [query]})
    translations = requests.post(
        url = url,
        data = payload,
        headers = headers
    )
    result = translations.json()
    return result['translations'][0]

In [9]:
pinecone_api_key = getpass()

In [10]:
from pinecone import Pinecone

pc = Pinecone(api_key = pinecone_api_key)

In [11]:
index = pc.Index('books')

In [12]:
index.describe_index_stats()

{'dimension': 512,
 'index_fullness': 0.18718,
 'namespaces': {'': {'vector_count': 18718}},
 'total_vector_count': 18718}

In [16]:
from getpass import getpass

openai_api_key = getpass("OPENAI_TOKEN")

In [17]:
from openai import OpenAI

openai_client = OpenAI(api_key=openai_api_key)

In [18]:
def get_embedding(text_list):
    response = openai_client.embeddings.create(
        input=text_list,
        model = 'text-embedding-3-small',
        dimensions=512
    )
    return [x.embedding for x in response.data]

In [56]:
def recommend(query):
    eng_query = translate(query)
    query_embedding = get_embedding([eng_query])[0]
    results = index.query(
        vector = query_embedding,
        filter={
            'publisher' : {'$in' : ['북박스']}   
        },
        top_k = 2,
        include_metadata = True
    )
    return results['matches']

In [57]:
query = '맛있는 음식 이야기'

In [59]:
recommend(query)

[{'id': '295010',
  'metadata': {'authors': '최은영',
               'img_url': 'https://image.yes24.com/goods/295010/L',
               'published_at': '2002-08',
               'publisher': '북박스',
               'rating': 9.0,
               'review_cnt': 1.0,
               'summary': "천리안 동호회 '천일야화'의 비공개 사이트에 연재된 작품으로 파격적인 내용과 독특한 소재로 "
                          '인해 로맨스 애호가들에게 비상한 관심을 끌었던 작품이다. 개성 강한 인물들의 예측 불허한 삶을 '
                          '그리는 이 특별한 소설은 곳곳에 허를 찌르는 대사를 폭탄처럼 숨겨놓았다가 적시에 한번씩 '
                          '터뜨림으로써 독자들의 가슴을 단번에 사로잡는 작품이다. 또한 거친 폭풍 같은 남자를 사랑하는 '
                          '지고지순한 여인의 따뜻하면서도 꿋꿋한 행보를 강한 반전으로 풀어내 사랑이라는 그릇안에 세상을 '
                          '폭넓게 껴안고 있다. 결국 사랑하는 사람들이 서로 합쳐져 하나의 가정을 이룬다는 의미의 '
                          '플러스는 가정의 소중함을 끝까지 완성시켜 독자들에게 깊은 여운을 남길 것으로 믿는다.',
               'title': '플러스 1',
               'translation': 'It was serialized on the private site of the '
                              "clairvoyant club 'Thousand and One Nights

=========================

## 부산 description

In [1]:
import requests
from bs4 import BeautifulSoup
import time

In [2]:
url = 'https://korean.visitkorea.or.kr/call'
params = {
    'cmd': 'TOUR_CONTENT_LIST_VIEW',
    'month': 'All',
    'areaCode': '6',
    'sigunguCode': 'All',
    'tagId': 'All',
    'sortkind': '1',
    'locationx': '0',
    'locationy': '0',
    'page': '1',
    'cnt': '100',
    'typeList': 'Tour',
    'stampId': '1589345b-b030-11ea-b8bd-020027310001'
}

all_data = []  # 모든 페이지의 데이터를 저장할 리스트

while True:
    response = requests.get(url, params=params)
    data = response.json()

    # 받은 데이터에서 필요한 정보 추출
    result_list = data.get('body', {}).get('result', [])
    for item in result_list:
        img_path = item.get('imgPath', '')
        img_url = f'https://cdn.visitkorea.or.kr/img/call?cmd=VIEW&id={img_path}'
        
        extracted_data = {
            '관광지명': item.get('TITLE', ''),
            'catch': item.get('catchPhrase', ''),
            'addr1': item.get('addr1', ''),
            'img': img_url,
            'cotid' : item.get('cotId', ''),
            'areaCode' : item.get('areaCode', ''),
            'cat1' : item.get('cat1', ''),
            'cat2' : item.get('cat2', '')
        }
        
        cotid = extracted_data['cotid']
        cat1 = extracted_data['cat1']
        cat2 = extracted_data['cat2']
        areaCode = extracted_data['areaCode']
        
        new_url = f'https://korean.visitkorea.or.kr/detail/ms_detail.do?cotid={cotid}&big_category={cat1}&mid_category={cat2}&big_area={areaCode}'
        response = requests.get(new_url)
        html = response.text
        soup = BeautifulSoup(html,'html.parser')
        description = soup.find('div',class_='blind').text
        description = description.replace('\n','').replace('\t','')
        extracted_data['description'] = description

        all_data.append(extracted_data)
    
    # 다음 페이지로 이동
    # if params['page'] in range(0,1000,100):
    #     print('time sleep')
    #     time.sleep(200)
    # else:
    
    print(params['page'])
    params['page'] = str(int(params['page']) + 1)
    

    # 만약 더 이상 페이지가 없으면 반복문 종료
    if not result_list or len(all_data) == 1351:
        break

1
2
3
4
5
6
7
8
9
10
11
12
13
14


In [3]:
import pandas as pd
df = pd.DataFrame(all_data)

In [9]:
from tqdm import tqdm

tqdm.pandas()
df['관광지명'] = df['관광지명'].progress_apply(lambda x: x.replace(' ',''))

100%|██████████| 1351/1351 [00:00<00:00, 269974.97it/s]


In [24]:
import re

pattern = re.compile(r'\([^)]*\)')

df['관광지명'] = df['관광지명'].apply(lambda x: re.sub(pattern, '', x))

In [6]:
b = pd.read_csv('./data/busan.csv')

In [30]:
b = pd.merge(b,df[['관광지명','catch','description']],on='관광지명',how='left')

In [None]:
b[b.description.isna()]

Unnamed: 0,관광지명,주소,평균 체류시간,stars,reviews,광역지자체별 거주 방문자 비율,search_number,catch,description


In [None]:
df[df['관광지명'].str.contains('')]

In [72]:
b.loc[7,['catch','description']] = [df.iloc[1044]['catch'],df.iloc[1044]['description']]
b.loc[10,['catch','description']] = [df.iloc[129]['catch'],df.iloc[129]['description']]
b.loc[39,['catch','description']] = [df.iloc[1012]['catch'],df.iloc[1012]['description']]
b.loc[62,['catch','description']] = [df.iloc[427]['catch'],df.iloc[427]['description']]
b.loc[131,['catch','description']] = [df.iloc[797]['catch'],df.iloc[797]['description']]
b.loc[196,['catch','description']] = [df.iloc[981]['catch'],df.iloc[981]['description']]
b.loc[200,['catch','description']] = [df.iloc[1156]['catch'],df.iloc[1156]['description']]
b.loc[204,['catch','description']] = [df.iloc[214]['catch'],df.iloc[214]['description']]
b.loc[229,['catch','description']] = [df.iloc[1162]['catch'],df.iloc[1162]['description']]

In [76]:
b.to_csv('./부산/basic_info.csv',encoding='utf-8',index=False)

## 울산 description

In [80]:
url = 'https://korean.visitkorea.or.kr/call'
params = {
    'cmd': 'TOUR_CONTENT_LIST_VIEW',
    'month': 'All',
    'areaCode': '7',
    'sigunguCode': 'All',
    'tagId': 'All',
    'sortkind': '1',
    'locationx': '0',
    'locationy': '0',
    'page': '1',
    'cnt': '100',
    'typeList': 'Tour',
    'stampId': '1589345b-b030-11ea-b8bd-020027310001'
}

all_data = []  # 모든 페이지의 데이터를 저장할 리스트

while True:
    response = requests.get(url, params=params)
    data = response.json()

    # 받은 데이터에서 필요한 정보 추출
    result_list = data.get('body', {}).get('result', [])
    for item in result_list:
        img_path = item.get('imgPath', '')
        img_url = f'https://cdn.visitkorea.or.kr/img/call?cmd=VIEW&id={img_path}'
        
        extracted_data = {
            '관광지명': item.get('TITLE', ''),
            'catch': item.get('catchPhrase', ''),
            'addr1': item.get('addr1', ''),
            'img': img_url,
            'cotid' : item.get('cotId', ''),
            'areaCode' : item.get('areaCode', ''),
            'cat1' : item.get('cat1', ''),
            'cat2' : item.get('cat2', '')
        }
        
        cotid = extracted_data['cotid']
        cat1 = extracted_data['cat1']
        cat2 = extracted_data['cat2']
        areaCode = extracted_data['areaCode']
        
        new_url = f'https://korean.visitkorea.or.kr/detail/ms_detail.do?cotid={cotid}&big_category={cat1}&mid_category={cat2}&big_area={areaCode}'
        response = requests.get(new_url)
        html = response.text
        soup = BeautifulSoup(html,'html.parser')
        description = soup.find('div',class_='blind').text
        description = description.replace('\n','').replace('\t','')
        extracted_data['description'] = description

        all_data.append(extracted_data)
    
    # 다음 페이지로 이동
    # if params['page'] in range(0,1000,100):
    #     print('time sleep')
    #     time.sleep(200)
    # else:
    
    print(params['page'])
    params['page'] = str(int(params['page']) + 1)
    

    # 만약 더 이상 페이지가 없으면 반복문 종료
    if not result_list or len(all_data) == 563:
        break

1
2
3
4
5
6


In [82]:
df = pd.DataFrame(all_data)

In [85]:
tqdm.pandas()
df['관광지명'] = df['관광지명'].progress_apply(lambda x: x.replace(' ',''))

pattern = re.compile(r'\([^)]*\)')
df['관광지명'] = df['관광지명'].apply(lambda x: re.sub(pattern, '', x))

100%|██████████| 563/563 [00:00<00:00, 281486.85it/s]


In [95]:
u = pd.read_csv('./울산/basic_info.csv')

In [96]:
u = pd.merge(u,df[['관광지명','catch','description']],on='관광지명',how='left')

In [None]:
u[u.description.isna()]

Unnamed: 0,관광지명,주소,평균 체류시간,stars,reviews,광역지자체별 거주 방문자 비율,search_number,catch,description


In [None]:
df[df['관광지명'].str.contains('')]

In [115]:
u.loc[73,['catch','description']] = [df.iloc[111]['catch'],df.iloc[111]['description']]
u.loc[79,['catch','description']] = [df.iloc[1]['catch'],df.iloc[1]['description']]
u.loc[82,['catch','description']] = [df.iloc[166]['catch'],df.iloc[166]['description']]
u.loc[111,['catch','description']] = [df.iloc[120]['catch'],df.iloc[120]['description']]

In [118]:
u.to_csv('./울산/basic_info.csv',encoding='utf-8',index=False)

## 경남 description

In [119]:
url = 'https://korean.visitkorea.or.kr/call'
params = {
    'cmd': 'TOUR_CONTENT_LIST_VIEW',
    'month': 'All',
    'areaCode': '36',
    'sigunguCode': '16',
    'tagId': 'All',
    'sortkind': '1',
    'locationx': '0',
    'locationy': '0',
    'page': '1',
    'cnt': '100',
    'typeList': 'Tour',
    'stampId': '1589345b-b030-11ea-b8bd-020027310001'
}

all_data = []  # 모든 페이지의 데이터를 저장할 리스트

while True:
    response = requests.get(url, params=params)
    data = response.json()

    # 받은 데이터에서 필요한 정보 추출
    result_list = data.get('body', {}).get('result', [])
    for item in result_list:
        img_path = item.get('imgPath', '')
        img_url = f'https://cdn.visitkorea.or.kr/img/call?cmd=VIEW&id={img_path}'
        
        extracted_data = {
            '관광지명': item.get('TITLE', ''),
            'catch': item.get('catchPhrase', ''),
            'addr1': item.get('addr1', ''),
            'img': img_url,
            'cotid' : item.get('cotId', ''),
            'areaCode' : item.get('areaCode', ''),
            'cat1' : item.get('cat1', ''),
            'cat2' : item.get('cat2', '')
        }
        
        cotid = extracted_data['cotid']
        cat1 = extracted_data['cat1']
        cat2 = extracted_data['cat2']
        areaCode = extracted_data['areaCode']
        
        new_url = f'https://korean.visitkorea.or.kr/detail/ms_detail.do?cotid={cotid}&big_category={cat1}&mid_category={cat2}&big_area={areaCode}'
        response = requests.get(new_url)
        html = response.text
        soup = BeautifulSoup(html,'html.parser')
        description = soup.find('div',class_='blind').text
        description = description.replace('\n','').replace('\t','')
        extracted_data['description'] = description

        all_data.append(extracted_data)
    
    # 다음 페이지로 이동
    # if params['page'] in range(0,1000,100):
    #     print('time sleep')
    #     time.sleep(200)
    # else:
    
    print(params['page'])
    params['page'] = str(int(params['page']) + 1)
    

    # 만약 더 이상 페이지가 없으면 반복문 종료
    if not result_list or len(all_data) == 430:
        break

1
2
3
4
5


In [122]:
df = pd.DataFrame(all_data)

In [124]:
tqdm.pandas()
df['관광지명'] = df['관광지명'].progress_apply(lambda x: x.replace(' ',''))

pattern = re.compile(r'\([^)]*\)')
df['관광지명'] = df['관광지명'].apply(lambda x: re.sub(pattern, '', x))

100%|██████████| 430/430 [00:00<00:00, 429928.66it/s]


In [125]:
gy = pd.read_csv('./경남/basic_info.csv')

In [128]:
gy = pd.merge(gy,df[['관광지명','catch','description']],on='관광지명',how='left')

In [None]:
gy[gy.description.isna()]

Unnamed: 0,관광지명,주소,평균 체류시간,stars,reviews,광역지자체별 거주 방문자 비율,search_number,catch,description


In [None]:
df[df['관광지명'].str.contains('')]

In [160]:
gy.loc[7,['catch','description']] = [df.iloc[27]['catch'],df.iloc[27]['description']]
gy.loc[26,['catch','description']] = [df.iloc[133]['catch'],df.iloc[133]['description']]
gy.loc[29,['catch','description']] = [df.iloc[36]['catch'],df.iloc[36]['description']]
gy.loc[33,['catch','description']] = [df.iloc[351]['catch'],df.iloc[351]['description']]
gy.loc[39,['catch','description']] = [df.iloc[256]['catch'],df.iloc[256]['description']]
gy.loc[41,['catch','description']] = [df.iloc[20]['catch'],df.iloc[20]['description']]
gy.loc[42,['catch','description']] = [df.iloc[376]['catch'],df.iloc[376]['description']]
gy.loc[52,['catch','description']] = [df.iloc[44]['catch'],df.iloc[44]['description']]
gy.loc[55,['catch','description']] = [df.iloc[324]['catch'],df.iloc[324]['description']]
gy.loc[60,['catch','description']] = [df.iloc[7]['catch'],df.iloc[7]['description']]
gy.loc[64,['catch','description']] = [df.iloc[84]['catch'],df.iloc[84]['description']]
gy.loc[65,['catch','description']] = [df.iloc[382]['catch'],df.iloc[382]['description']]
gy.loc[69,['catch','description']] = [df.iloc[352]['catch'],df.iloc[352]['description']]
gy.loc[73,['catch','description']] = [df.iloc[342]['catch'],df.iloc[342]['description']]
gy.loc[92,['catch','description']] = [df.iloc[109]['catch'],df.iloc[109]['description']]
gy.loc[100,['catch','description']] = [df.iloc[119]['catch'],df.iloc[119]['description']]


In [163]:
gy.to_csv('./경남/basic_info.csv',encoding='utf-8',index=False)

## 전남 description

In [164]:
url = 'https://korean.visitkorea.or.kr/call'
params = {
    'cmd': 'TOUR_CONTENT_LIST_VIEW',
    'month': 'All',
    'areaCode': '38',
    'sigunguCode': '13',
    'tagId': 'All',
    'sortkind': '1',
    'locationx': '0',
    'locationy': '0',
    'page': '1',
    'cnt': '100',
    'typeList': 'Tour',
    'stampId': '1589345b-b030-11ea-b8bd-020027310001'
}

all_data = []  # 모든 페이지의 데이터를 저장할 리스트

while True:
    response = requests.get(url, params=params)
    data = response.json()

    # 받은 데이터에서 필요한 정보 추출
    result_list = data.get('body', {}).get('result', [])
    for item in result_list:
        img_path = item.get('imgPath', '')
        img_url = f'https://cdn.visitkorea.or.kr/img/call?cmd=VIEW&id={img_path}'
        
        extracted_data = {
            '관광지명': item.get('TITLE', ''),
            'catch': item.get('catchPhrase', ''),
            'addr1': item.get('addr1', ''),
            'img': img_url,
            'cotid' : item.get('cotId', ''),
            'areaCode' : item.get('areaCode', ''),
            'cat1' : item.get('cat1', ''),
            'cat2' : item.get('cat2', '')
        }
        
        cotid = extracted_data['cotid']
        cat1 = extracted_data['cat1']
        cat2 = extracted_data['cat2']
        areaCode = extracted_data['areaCode']
        
        new_url = f'https://korean.visitkorea.or.kr/detail/ms_detail.do?cotid={cotid}&big_category={cat1}&mid_category={cat2}&big_area={areaCode}'
        response = requests.get(new_url)
        html = response.text
        soup = BeautifulSoup(html,'html.parser')
        description = soup.find('div',class_='blind').text
        description = description.replace('\n','').replace('\t','')
        extracted_data['description'] = description

        all_data.append(extracted_data)
    
    # 다음 페이지로 이동
    # if params['page'] in range(0,1000,100):
    #     print('time sleep')
    #     time.sleep(200)
    # else:
    
    print(params['page'])
    params['page'] = str(int(params['page']) + 1)
    

    # 만약 더 이상 페이지가 없으면 반복문 종료
    if not result_list or len(all_data) == 474:
        break

1
2
3
4
5


In [165]:
df = pd.DataFrame(all_data)

In [167]:
tqdm.pandas()
df['관광지명'] = df['관광지명'].progress_apply(lambda x: x.replace(' ',''))

pattern = re.compile(r'\([^)]*\)')
df['관광지명'] = df['관광지명'].apply(lambda x: re.sub(pattern, '', x))

100%|██████████| 474/474 [00:00<00:00, 474034.36it/s]


In [168]:
j = pd.read_csv('./전남/basic_info.csv')

In [170]:
j = pd.merge(j,df[['관광지명','catch','description']],on='관광지명',how='left')

In [None]:
j[j.description.isna()]

Unnamed: 0,관광지명,주소,평균 체류시간,stars,reviews,광역지자체별 거주 방문자 비율,search_number,catch,description


In [None]:
df[df['관광지명'].str.contains('')]

In [200]:
j.loc[9,['catch','description']] = [df.iloc[97]['catch'],df.iloc[97]['description']]
j.loc[15,['catch','description']] = [df.iloc[163]['catch'],df.iloc[163]['description']]
j.loc[25,['catch','description']] = [df.iloc[98]['catch'],df.iloc[98]['description']]
j.loc[28,['catch','description']] = [df.iloc[348]['catch'],df.iloc[348]['description']]
j.loc[30,['catch','description']] = [df.iloc[232]['catch'],df.iloc[232]['description']]

j.loc[36,['catch','description']] = [df.iloc[341]['catch'],df.iloc[341]['description']]
j.loc[37,['catch','description']] = [df.iloc[49]['catch'],df.iloc[49]['description']]
j.loc[44,['catch','description']] = [df.iloc[86]['catch'],df.iloc[86]['description']]
j.loc[46,['catch','description']] = [df.iloc[48]['catch'],df.iloc[48]['description']]
j.loc[59,['catch','description']] = [df.iloc[167]['catch'],df.iloc[167]['description']]

j.loc[62,['catch','description']] = [df.iloc[10]['catch'],df.iloc[10]['description']]
j.loc[66,['catch','description']] = [df.iloc[220]['catch'],df.iloc[220]['description']]
j.loc[67,['catch','description']] = [df.iloc[13]['catch'],df.iloc[13]['description']]
j.loc[69,['catch','description']] = [df.iloc[376]['catch'],df.iloc[376]['description']]
j.loc[72,['catch','description']] = [df.iloc[418]['catch'],df.iloc[418]['description']]

j.loc[73,['catch','description']] = [df.iloc[2]['catch'],df.iloc[2]['description']]
j.loc[79,['catch','description']] = [df.iloc[7]['catch'],df.iloc[7]['description']]
j.loc[80,['catch','description']] = [df.iloc[55]['catch'],df.iloc[55]['description']]
j.loc[82,['catch','description']] = [df.iloc[394]['catch'],df.iloc[394]['description']]
j.loc[60,['catch','description']] = [df.iloc[353]['catch'],df.iloc[353]['description']]


In [202]:
j.to_csv('./전남/basic_info.csv',encoding='utf-8',index=False)

## 광주 description

In [203]:
url = 'https://korean.visitkorea.or.kr/call'
params = {
    'cmd': 'TOUR_CONTENT_LIST_VIEW',
    'month': 'All',
    'areaCode': '5',
    'sigunguCode': 'All',
    'tagId': 'All',
    'sortkind': '1',
    'locationx': '0',
    'locationy': '0',
    'page': '1',
    'cnt': '100',
    'typeList': 'Tour',
    'stampId': '1589345b-b030-11ea-b8bd-020027310001'
}

all_data = []  # 모든 페이지의 데이터를 저장할 리스트

while True:
    response = requests.get(url, params=params)
    data = response.json()

    # 받은 데이터에서 필요한 정보 추출
    result_list = data.get('body', {}).get('result', [])
    for item in result_list:
        img_path = item.get('imgPath', '')
        img_url = f'https://cdn.visitkorea.or.kr/img/call?cmd=VIEW&id={img_path}'
        
        extracted_data = {
            '관광지명': item.get('TITLE', ''),
            'catch': item.get('catchPhrase', ''),
            'addr1': item.get('addr1', ''),
            'img': img_url,
            'cotid' : item.get('cotId', ''),
            'areaCode' : item.get('areaCode', ''),
            'cat1' : item.get('cat1', ''),
            'cat2' : item.get('cat2', '')
        }
        
        cotid = extracted_data['cotid']
        cat1 = extracted_data['cat1']
        cat2 = extracted_data['cat2']
        areaCode = extracted_data['areaCode']
        
        new_url = f'https://korean.visitkorea.or.kr/detail/ms_detail.do?cotid={cotid}&big_category={cat1}&mid_category={cat2}&big_area={areaCode}'
        response = requests.get(new_url)
        html = response.text
        soup = BeautifulSoup(html,'html.parser')
        description = soup.find('div',class_='blind').text
        description = description.replace('\n','').replace('\t','')
        extracted_data['description'] = description

        all_data.append(extracted_data)
    
    # 다음 페이지로 이동
    # if params['page'] in range(0,1000,100):
    #     print('time sleep')
    #     time.sleep(200)
    # else:
    
    print(params['page'])
    params['page'] = str(int(params['page']) + 1)
    

    # 만약 더 이상 페이지가 없으면 반복문 종료
    if not result_list or len(all_data) == 493:
        break

1
2
3
4
5


In [205]:
df = pd.DataFrame(all_data)

In [207]:
tqdm.pandas()
df['관광지명'] = df['관광지명'].progress_apply(lambda x: x.replace(' ',''))

pattern = re.compile(r'\([^)]*\)')
df['관광지명'] = df['관광지명'].apply(lambda x: re.sub(pattern, '', x))

100%|██████████| 493/493 [00:00<00:00, 246165.70it/s]


In [208]:
gw = pd.read_csv('./광주/basic_info.csv')

In [210]:
gw = pd.merge(gw,df[['관광지명','catch','description']],on='관광지명',how='left')

In [None]:
gw[gw.description.isna()]

Unnamed: 0,관광지명,주소,평균 체류시간,stars,reviews,광역지자체별 거주 방문자 비율,search_number,catch,description


In [None]:
df[df['관광지명'].str.contains('하정웅미술관')]

Unnamed: 0,관광지명,catch,addr1,img,cotid,areaCode,cat1,cat2,description
138,하정웅미술관,시간과 공간을 넘나들며 한국 근현대사의 폭을 한층 확장시킨 '하정웅미술관'\t\t,광주광역시 서구 상무대로 1165,https://cdn.visitkorea.or.kr/img/call?cmd=VIEW...,97c6b0af-48df-4b62-94db-1c7ae5c6b5f1,5,A02,A0206,광주시립미술관 분관 ''하정웅미술관''은 1981년 3월에 건립되어 ''전남도지사 ...


In [231]:
gw.loc[3,['catch','description']] = [df.iloc[41]['catch'],df.iloc[41]['description']]
gw.loc[5,['catch','description']] = [df.iloc[85]['catch'],df.iloc[85]['description']]
gw.loc[7,['catch','description']] = [df.iloc[149]['catch'],df.iloc[149]['description']]
gw.loc[8,['catch','description']] = [df.iloc[38]['catch'],df.iloc[38]['description']]
gw.loc[9,['catch','description']] = [df.iloc[351]['catch'],df.iloc[351]['description']]
gw.loc[17,['catch','description']] = [df.iloc[40]['catch'],df.iloc[40]['description']]
gw.loc[26,['catch','description']] = [df.iloc[11]['catch'],df.iloc[11]['description']]
gw.loc[32,['catch','description']] = [df.iloc[27]['catch'],df.iloc[27]['description']]
gw.loc[33,['catch','description']] = [df.iloc[449]['catch'],df.iloc[449]['description']]
gw.loc[36,['catch','description']] = [df.iloc[83]['catch'],df.iloc[83]['description']]
gw.loc[42,['catch','description']] = [df.iloc[356]['catch'],df.iloc[356]['description']]
gw.loc[47,['catch','description']] = [df.iloc[138]['catch'],df.iloc[138]['description']]

In [233]:
gw.to_csv('./광주/basic_info.csv',encoding='utf-8',index=False)

========================================================================================================================================================================================================================================

========================================================================================================================================================================================================================================

========================================================================================================================================================================================================================================


In [19]:
import pandas as pd

busan = pd.read_csv('./data/busan.csv')
ulsan = pd.read_csv('./data/ulsan.csv')
gyeongnam = pd.read_csv('./data/gyeongnam.csv')
jeonnam = pd.read_csv('./data/jeonnam.csv')
gwangju = pd.read_csv('./data/gwangju.csv')

In [20]:
busan_avg = busan['scores'].sum() / len(busan)
ulsan_avg = ulsan['scores'].sum() / len(ulsan)
gyeongnam_avg = gyeongnam['scores'].sum() / len(gyeongnam)
jeonnam_avg = jeonnam['scores'].sum() / len(jeonnam)
gwangju_avg = gwangju['scores'].sum() / len(gwangju)

In [25]:
for i in range(len(busan)):
    if busan.iloc[i]['scores'] >= busan_avg:
        busan.at[i,'구분'] = '숨은 명소'
    else:
        busan.at[i,'구분'] = '일반 명소'
        
for i in range(len(ulsan)):
    if ulsan.iloc[i]['scores'] >= ulsan_avg:
        ulsan.at[i,'구분'] = '숨은 명소'
    else:
        ulsan.at[i,'구분'] = '일반 명소'
        
for i in range(len(gyeongnam)):
    if gyeongnam.iloc[i]['scores'] >= gyeongnam_avg:
        gyeongnam.at[i,'구분'] = '숨은 명소'
    else:
        gyeongnam.at[i,'구분'] = '일반 명소'
        
for i in range(len(jeonnam)):
    if jeonnam.iloc[i]['scores'] >= jeonnam_avg:
        jeonnam.at[i,'구분'] = '숨은 명소'
    else:
        jeonnam.at[i,'구분'] = '일반 명소'
        
for i in range(len(gwangju)):
    if gwangju.iloc[i]['scores'] >= gwangju_avg:
        gwangju.at[i,'구분'] = '숨은 명소'
    else:
        gwangju.at[i,'구분'] = '일반 명소'

In [40]:
all_df = pd.concat([busan,ulsan,gyeongnam,jeonnam,gwangju],ignore_index=True)

In [54]:
all_df.head()

Unnamed: 0,관광지명,평균 체류시간,stars,reviews,광역지자체별 거주 방문자 비율,search_number,avg,주소,대분류,중분류,소분류,longitude,latitude,scores,catch,description,img,구분
0,명지동근린공원,164,4.1,8.0,62.5,23200,553.416667,부산광역시 강서구 명지오션시티10로 95,인문(문화/예술/역사),휴양관광지,공원,128.906657,35.087033,24.909113,도심 속 공원,"명지동 근린공원은 명지오션시티에 있는 도심 공원이다. 인근에는 주차장, 축구장, 잔...",https://cdn.visitkorea.or.kr/img/call?cmd=VIEW...,숨은 명소
1,칠암항,73,4.0,2.0,53.6,18000,234.166667,부산광역시 기장군 일광면 문오성길 510,자연,자연관광지,항구/포구,129.26029,35.298622,24.586122,베이징 올림픽 금메달 신화를 기념하다,"칠암항은 방파제가 있는 항으로 야구 방망이와 글러브, 야구공을 형상화 한 야구 등대...",https://cdn.visitkorea.or.kr/img/call?cmd=VIEW...,숨은 명소
2,학리항,88,4.4,37.0,56.6,9160,681.5,부산광역시 기장군 일광면 일광읍 학리 251-22,자연,자연관광지,항구/포구,129.247753,35.259973,24.220685,,부산광역시 기장군 일광면 학리에 있는 어항으로 기장 해안가의 오래된 포구이다. 멸치...,https://cdn.visitkorea.or.kr/img/call?cmd=VIEW...,숨은 명소
3,아미르공원,154,4.0,3.0,53.0,63000,262.0,부산광역시 영도구 해양로301번길 55,인문(문화/예술/역사),휴양관광지,공원,129.077442,35.076293,23.708141,푸른 하늘과 에메랄드빛 바다 사이로 초록의 싱그러움이 반짝이는 아름다운 공원,아미르 공원은 1990년 말 부산항 북항 개발과정의 준설토를 매립하여 조성된 동삼혁...,https://cdn.visitkorea.or.kr/img/call?cmd=VIEW...,숨은 명소
4,부산복천동고분군,183,4.0,1.0,74.3,20100,20.916667,부산광역시 동래구 복천동,인문(문화/예술/역사),역사관광지,유적지/사적지,129.090682,35.206873,23.631031,"가야 시대를 만날 수 있는, 부산 복천동 고분군",부산 복천동 일대의 구릉 위에 있는 가야 때 무덤들이다. 여러 차례에 걸친 발굴조사...,https://cdn.visitkorea.or.kr/img/call?cmd=VIEW...,숨은 명소


In [57]:
pd.isna(all_df.iloc[2]['catch'])

True

In [30]:
from googletrans import Translator

translator = Translator()

In [64]:
answer = []

for i in range(len(all_df)):
    if pd.isna(all_df.iloc[i]['catch']):
        text = all_df.iloc[i]['description']
    else:
        text = f"{all_df.iloc[i]['catch']}. {all_df.iloc[i]['description']}"
    text = translator.translate(text,src='ko',dest='en').text
    
    answer.append(text)

ReadTimeout: The read operation timed out

In [68]:
answer

['Park in the city center.Myeongji -dong Neighborhood Park is a city park in Myeongji Ocean City.There are parking lots, soccer fields, grass plazas, and sports equipment, and there is also an infant forest experience center for children to play with their families.At the Infant Forest Experience Center, there are several experience facilities where children can play natural.In summer, you can also run a facility where you can enjoy water.There is a library of Gangseo Miracle and the parking lot is conveniently available.',
 "We commemorate the Beijing Olympic gold medal myth.Chilam Port is a famous port with a breakwater, and it is famous when the baseball lighthouse, which embodies baseball bats, gloves, and baseball balls.If you are a baseball fan, it is a unique lighthouse that commemorates the created gold medal myth at the 2008 Beijing Olympics.There is also a memorial to Lotte Giants' cast iron pitcher Choi Dong -won.Breakwater and exploration roads to baseball are well maintain