In [1]:
'''
사용법 : 코드를 Run All한뒤, 대륙명(한/영) 국가명(한/영 only Asia),
         시작일('yyyy-MM-dd'),종료일('yyyy-MM-dd')을 입력하면 CSV파일이 자동 생성됩니다.
         추가로, 옵션(선택사항에서) 변수의 사용자입력 옵션을 변경할수 있습니다.
'''

continentName       =    '아시아'
countryName         =    '대만'           # 현재는 아시아만 지원됨
# startDate           =    '2016-06-01'     # 1년치
# endDate             =    '2023-05-01'      
# startDate           =    '2016-06-01'     # 3년치
# endDate             =    '2019-05-01'
startDate           =    '2015-06-01'     # 4년치
endDate             =    '2019-05-01'

In [2]:
#---------------------
# 1) 모듈 자동 리로드
#---------------------
%load_ext autoreload
%autoreload 2

#---------------------
# 2) 항시적용모듈
#---------------------
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#---------------------
# 3) 파싱을위한모듈
#---------------------
from utils import *
from utils2 import *
from youtubeApi import YoutubeApi
from countryCodeAPI import CountryCodeApi

import csv
import json
from urllib import parse
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
from collections import OrderedDict


In [3]:
# 4) 한글폰트세팅
import matplotlib
import matplotlib.font_manager as fm
font_location = "C:\\Windows\\Fonts\\Malgun.ttf"
font_name = fm.FontProperties(fname=font_location).get_name()
matplotlib.rc('font',family=font_name)

# 5) sns 테마설정
sns.set_theme(style='whitegrid')


# 7) 객체 인스턴스 생성
utils = Utils
utils2 = Utils2

In [4]:
# 8) 사용자 대륙/국가명 세팅

# 영문 대륙명 생성
continentNameEng = utils2.translateKorToEngForContinent(continentName)
# 영문 국가명 생성(only asia)
countryNameEng = utils2.translateKorToEngForCountry(countryName)

# 한글 대륙명 생성
continentNameKor = utils2.translateEngToKorForContinent(continentName)
# 한글 국가명 생성(only asia)
countryNameKor = utils2.translateEngToKorForCountry(countryName)


print(f'continentNameEng :   {continentNameEng}')
print(f'countryNameEng   :   {countryNameEng}')
print(f'continentNameKor :   {continentNameKor}')
print(f'countryNameKor   :   {countryNameKor}')



continentNameEng :   asia
countryNameEng   :   taiwan
continentNameKor :   아시아
countryNameKor   :   대만


In [5]:
#========================================================================================================#
#                                               데이터프레임 생성(선택국가)                                           
#========================================================================================================#

In [6]:
# 1) 선택한 대륙의 한국인 방문객 CSV 파일 로드(데이터프레임)
#------------------
continentDataFrame =  utils2.getContinentalDataFrameFromCSV(f'../data/{continentNameEng }_data_kor.csv')
continentDataFrame

Unnamed: 0,Year,month,법무부_명수,법무부_전년대비,일본_명수,일본_전년대비,중국_명수,중국_전년대비,베트남_명수,베트남_전년대비,...,사이프러스_명수,사이프러스_전년대비,부탄_명수,부탄_전년대비,요르단_명수,요르단_전년대비,네팔_명수,네팔_전년대비,예멘_명수,예멘_전년대비
0,2004년,1월,793478.0,,164785.0,,186246.0,,,,...,,,,,,,,,,
1,,2월,670447.0,,142718.0,,215373.0,,,,...,,,,,,,,,,
2,,3월,587629.0,,112516.0,,190382.0,,,,...,,,,,,,,,,
3,,4월,642413.0,,120427.0,,223062.0,,,,...,,,,,,,,,,
4,,5월,680185.0,,115659.0,,202592.0,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
228,2023년,1월,1782313.0,11.088887,565251.0,446.192247,,,258946.0,,...,,,,,,,,,,
229,,2월,1724880.0,14.302071,568622.0,621.806134,,,301343.0,,...,,,,,,,,,,
230,,3월,1472193.0,9.117956,466810.0,68.735584,,,250649.0,,...,,,,,,,,,,
231,,4월,1497105.0,5.955321,467000.0,59.657228,,,259357.0,,...,,,,,,,,,,


In [7]:
# 2) 선택국가의 데이터(연,월,방문객수,연간증가율) 추출과
#    결측치를 처리한 데이터프레임 생성
#------------------
countryDataFrame = utils2.getCountryDataFrame(continentDataFrame,countryNameKor)
countryDataFrame

Unnamed: 0,Year,Month,NumVisitors,annualGrowthRate
0,2004년,1월,1.2193,0.000000
1,2004년,2월,1.3916,0.000000
2,2004년,3월,1.2077,0.000000
3,2004년,4월,1.2802,0.000000
4,2004년,5월,1.1188,0.000000
...,...,...,...,...
228,2023년,1월,3.6536,123.272109
229,2023년,2월,5.9195,163.888579
230,2023년,3월,5.2300,127.817734
231,2023년,4월,5.8950,132.371041


In [8]:
# 3) 데이터 타입변환을 위한 info출력
#------------------
countryDataFrame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 233 entries, 0 to 232
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Year              233 non-null    object 
 1   Month             233 non-null    object 
 2   NumVisitors       233 non-null    float64
 3   annualGrowthRate  233 non-null    float64
dtypes: float64(2), object(2)
memory usage: 7.4+ KB


In [9]:
# 4) 연속된 값으로 날짜처리(Date는 DataFrame의 index로 실제 csv에 반영X)
#------------------
countryDataFrame = utils2.changeYearAndMonthDtypeToInt(countryDataFrame)
countryDataFrame

Unnamed: 0,Year,Month,NumVisitors,annualGrowthRate,Date
0,2004,1,1.2193,0.000000,2004-01-01
1,2004,2,1.3916,0.000000,2004-02-01
2,2004,3,1.2077,0.000000,2004-03-01
3,2004,4,1.2802,0.000000,2004-04-01
4,2004,5,1.1188,0.000000,2004-05-01
...,...,...,...,...,...
228,2023,1,3.6536,123.272109,2023-01-01
229,2023,2,5.9195,163.888579,2023-02-01
230,2023,3,5.2300,127.817734,2023-03-01
231,2023,4,5.8950,132.371041,2023-04-01


In [10]:
# 5) 선택한 날짜로 필터링
#------------------
countryDataFrame = utils2.extractDataByDateFilter(countryDataFrame,startDate,endDate)
display(countryDataFrame)

Unnamed: 0,Year,Month,NumVisitors,annualGrowthRate,Date
137,2015,6,3.6696,0.084718,2015-06-01
138,2015,7,4.5349,0.229437,2015-07-01
139,2015,8,5.432,0.238345,2015-08-01
140,2015,9,4.677,0.242825,2015-09-01
141,2015,10,5.9864,0.247583,2015-10-01
142,2015,11,5.9168,0.226331,2015-11-01
143,2015,12,6.9555,0.384041,2015-12-01
144,2016,1,8.564,0.138815,2016-01-01
145,2016,2,7.6407,0.324395,2016-02-01
146,2016,3,5.6442,0.1349,2016-03-01


In [11]:
# 6-1) 컬럼 재정렬
#------------------
countryDataFrame = utils2.arrangeDataFrameColumns(countryDataFrame)
countryDataFrame

Unnamed: 0,Year,Month,Date,NumVisitors,annualGrowthRate
137,2015,6,2015-06-01,3.6696,0.084718
138,2015,7,2015-07-01,4.5349,0.229437
139,2015,8,2015-08-01,5.432,0.238345
140,2015,9,2015-09-01,4.677,0.242825
141,2015,10,2015-10-01,5.9864,0.247583
142,2015,11,2015-11-01,5.9168,0.226331
143,2015,12,2015-12-01,6.9555,0.384041
144,2016,1,2016-01-01,8.564,0.138815
145,2016,2,2016-02-01,7.6407,0.324395
146,2016,3,2016-03-01,5.6442,0.1349


In [12]:
# 6.xx) Year 와 Month를 object로 변환(선택)
#------------------
countryDataFrame = utils2.changeYearAndMonthDtypeToObject(countryDataFrame)
countryDataFrame

Unnamed: 0,Year,Month,Date,NumVisitors,annualGrowthRate
137,2015년,6월,2015-06-01,3.6696,0.084718
138,2015년,7월,2015-07-01,4.5349,0.229437
139,2015년,8월,2015-08-01,5.432,0.238345
140,2015년,9월,2015-09-01,4.677,0.242825
141,2015년,10월,2015-10-01,5.9864,0.247583
142,2015년,11월,2015-11-01,5.9168,0.226331
143,2015년,12월,2015-12-01,6.9555,0.384041
144,2016년,1월,2016-01-01,8.564,0.138815
145,2016년,2월,2016-02-01,7.6407,0.324395
146,2016년,3월,2016-03-01,5.6442,0.1349


In [13]:
# 6.xx) 인덱스 초기화(선택)
#------------------
countryDataFrame = utils2.resetIndexForDataFrame(countryDataFrame)
countryDataFrame

Unnamed: 0,Year,Month,Date,NumVisitors,annualGrowthRate
0,2015년,6월,2015-06-01,3.6696,0.084718
1,2015년,7월,2015-07-01,4.5349,0.229437
2,2015년,8월,2015-08-01,5.432,0.238345
3,2015년,9월,2015-09-01,4.677,0.242825
4,2015년,10월,2015-10-01,5.9864,0.247583
5,2015년,11월,2015-11-01,5.9168,0.226331
6,2015년,12월,2015-12-01,6.9555,0.384041
7,2016년,1월,2016-01-01,8.564,0.138815
8,2016년,2월,2016-02-01,7.6407,0.324395
9,2016년,3월,2016-03-01,5.6442,0.1349


In [14]:
# 6.xx) 날짜 필터링된 데이터프레임의 시작과 끝 년월을 문자열로 추출
#------------------
startYear,startMonth,endYear,endMonth = utils2.getStrYearAndMonth(countryDataFrame)

print(startYear)
print(startMonth)

print(endYear)
print(endMonth)



2015년
6월
2019년
5월


In [15]:
# 7) 결과를 CSV로 저장
#------------------
fileRoot = f'./data/'
fileName = f'{countryNameKor}_{countryNameEng}DataFrame_({startYear}{startMonth}~{endYear}{endMonth})'
fileExtention = '.csv'
filePath = fileRoot + fileName + fileExtention

countryDataFrame.to_csv(filePath, encoding='utf-8-sig', index=False)
countryDataFrame

Unnamed: 0,Year,Month,Date,NumVisitors,annualGrowthRate
0,2015년,6월,2015-06-01,3.6696,0.084718
1,2015년,7월,2015-07-01,4.5349,0.229437
2,2015년,8월,2015-08-01,5.432,0.238345
3,2015년,9월,2015-09-01,4.677,0.242825
4,2015년,10월,2015-10-01,5.9864,0.247583
5,2015년,11월,2015-11-01,5.9168,0.226331
6,2015년,12월,2015-12-01,6.9555,0.384041
7,2016년,1월,2016-01-01,8.564,0.138815
8,2016년,2월,2016-02-01,7.6407,0.324395
9,2016년,3월,2016-03-01,5.6442,0.1349
