# Python import statement

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

# Import Data

In [2]:
df_2024 = pd.read_excel('2024_외래관광객조사_Data.xlsx')
df_2023 = pd.read_excel('2023_외래관광객조사_DATA.xlsx')

# Take columns 

In [5]:
cols_to_rename = ['Q8a04','Q8a05','Q8a06','Q8a07','Q8a08','Q8a09','Q8a10',
                  'Q8a11','Q8a12','Q8a13','Q8a14','Q8a15','Q8a16','Q8a17','Q8a18','Q8a19']

# Create a dictionary mapping from old name to new name
rename_dict = {col: f"Q8a{int(col[3:]):02d}" for col in cols_to_rename}
rename_dict = {old: f"Q8a{int(old[3:]) + 1:02d}" for old in cols_to_rename}

# Rename columns in DataFrame
df_2023.rename(columns=rename_dict, inplace=True)

In [6]:
# Rename from end to beginning to avoid overlap
cols_to_rename = [
    "Q12a04", "Q12a05", "Q12a06", "Q12a07", "Q12a08", "Q12a09", "Q12a10",
    "Q12a11", "Q12a12", "Q12a13", "Q12a14", "Q12a15", "Q12a16", "Q12a17",
    "Q12a18", "Q12a19", "Q12a20", "Q12a21", "Q12a22", "Q12a23", "Q12a24",
    "Q12a25", "Q12a26"
]

# Create a dictionary mapping from old name to new name
rename_dict = {col: f"Q12a{int(col[4:]):02d}" for col in cols_to_rename}
rename_dict = {old: f"Q12a{int(old[4:]) + 1:02d}" for old in cols_to_rename}

# Rename columns in DataFrame
df_2023.rename(columns=rename_dict, inplace=True)

In [10]:
desired_columns = [
    'MVIT', 'TYP', 'Q1', 'Q1_1a1',
    'Q2a1', 'Q2a2','Q2a3',
    'Q3_1a1', 'Q3_1a2','Q3_1a3', 
    'Q3_2a1','Q3_2a2','Q3_2a3',
    'Q4a1','Q4a2','Q4a3',
    'Q7A','Q7a_dk','Q7a2','Q7a3','Q7a4','Q7a5','Q7a6','Q7a7','Q7a8',# Type of companion
    'Q8a01','Q8a02','Q8a03','Q8a04','Q8a05','Q8a06','Q8a07','Q8a08','Q8a09','Q8a10', 'Q8a11', 'Q8a12', 'Q8a13', 'Q8a14', 'Q8a15', 'Q8a16', 'Q8a17', 'Q8a18', 'Q8a19','Q8a20',#Activities_participated
    'Q9_2a01', 'Q9_2a02', 'Q9_2a03', 'Q9_2a04', 'Q9_2a05', 'Q9_2a06', 'Q9_2a07', 'Q9_2a08', 'Q9_2a09', 'Q9_2a10', 'Q9_2a11', 'Q9_2a12', 'Q9_2a13', 'Q9_2a14', 'Q9_2a15', 'Q9_2a16', 'Q9_2a17',
    'M일HAP','R일HAP',
    '총액1인TOT2','MDAY전체TOT_RAW61','MDAY개별대체61','MDAY에어대체61','MDAY단체대체61',
    'Q12a01', 'Q12a02', 'Q12a03', 'Q12a04', 'Q12a05', 'Q12a06', 'Q12a07', 'Q12a08', 'Q12a09', 'Q12a10', 'Q12a11', 'Q12a12', 'Q12a13', 'Q12a14', 'Q12a15', 'Q12a16', 
    'Q12a17', 'Q12a18', 'Q12a19', 'Q12a20', 'Q12a21', 'Q12a22', 'Q12a23', 'Q12a24', 'Q12a25', 'Q12a26', 'Q12a27',
    'Q13', 'Q14', 'D_MON', 'D_BUN', 'D_NAT', 'D_SEX', 'D_AGE', 'D_MOK', 'D_GUB', 'weight1', 'weight2', 'weight3', 'weight4', 'weight'
]

# Data alignment and merge for 2023 & 2024

In [11]:
for col in desired_columns:
    if col not in df_2023.columns:
        df_2023[col] = np.nan
    if col not in df_2024.columns:
        df_2024[col] = np.nan

df_2023['YEAR'] = 2023
df_2024['YEAR'] = 2024

df_combined = pd.concat([df_2023[desired_columns + ['YEAR']], df_2024[desired_columns + ['YEAR']]], ignore_index=True)

In [12]:
df_combined

Unnamed: 0,MVIT,TYP,Q1,Q1_1a1,Q2a1,Q2a2,Q2a3,Q3_1a1,Q3_1a2,Q3_1a3,...,D_SEX,D_AGE,D_MOK,D_GUB,weight1,weight2,weight3,weight4,weight,YEAR
0,4,1,1,2.0,4.0,8.0,2.0,1.0,7.0,2.0,...,2,2,1,1,,,,,866.099893,2023
1,20,1,2,2.0,,,,3.0,13.0,2.0,...,1,3,2,1,,,,,191.333757,2023
2,2,1,5,,,,,,,,...,2,1,5,1,,,,,244.838755,2023
3,1,1,3,,,,,,,,...,1,5,3,1,,,,,555.272342,2023
4,3,1,2,8.0,,,,2.0,1.0,4.0,...,2,4,2,1,,,,,193.434506,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32407,2,1,1,5.0,1.0,,,3.0,2.0,1.0,...,2,4,1,1,,,,1215.218286,1215.218286,2024
32408,2,1,1,9.0,5.0,7.0,1.0,12.0,3.0,1.0,...,1,4,1,1,,,,1131.834751,1131.834751,2024
32409,1,1,1,5.0,,,,13.0,,,...,2,5,1,1,,,,1212.382353,1212.382353,2024
32410,2,1,1,2.0,,,,3.0,2.0,1.0,...,1,5,1,1,,,,1287.852779,1287.852779,2024


# Handling categorical label mapping of variable values

In [14]:
df_combined.loc[:, "TYP"] = df_combined["TYP"].astype("category")
TYP_map = {
    1: "별여행",             
    2: "Air-tel package",      
    3: "단체여행"                 
}

df_combined.loc[:, "TYP"] = df_combined["TYP"].map(TYP_map)

In [15]:
df_combined.loc[:, "Q1"] = df_combined["Q1"].astype("category")
Q1_map = {
    1: "여가, 위락, 휴식",      
    2: "친구, 친지 방문",          
    3: "사업 또는 전문 활동",       
    4: "교육",                    
    5: "기타"                      
}
df_combined.loc[:, "Q1"] = df_combined["Q1"].map(Q1_map)

In [16]:
def group_q1_1a1(reason):
    if reason in [7,8]:
        return "콘텐츠 영향"
    elif reason == 9:
        return "관광 정보"
    elif reason in [4,5,6,10]:
        return "가격/거리"
    elif reason in [1,2]:
        return "경험/호감"
    elif reason in [3,11]:
        return "기타"
    else:
        return "미상"
df_combined.loc[:, "Q1_1a1"] = df_combined["Q1_1a1"].apply(group_q1_1a1)

In [17]:
df_combined.loc[:, "Q2a1"] = df_combined["Q2a1"].astype("category")
df_combined.loc[:, "Q2a2"] = df_combined["Q2a2"].astype("category")
df_combined.loc[:, "Q2a3"] = df_combined["Q2a3"].astype("category")
Q2a_map = {
    1: "일본",
    2: "중국",
    3: "대만",
    4: "홍콩",
    5: "싱가포르",
    6: "태국",
    7: "말레이시아",
    8: "마카오",
    9: "인도",
    10: "인도네시아",
    11: "필리핀",
    12: "베트남",
    13: "캄보디아",
    14: "기타"
}
for col in ["Q2a1", "Q2a2", "Q2a3"]:
    df_combined.loc[:, col] = df_combined[col].map(Q2a_map)

In [18]:
def group_tour_activity(code_int):
    if code_int in [1]:
        return '음식 및 미식 관광'
    elif code_int in [2, 11, 12]:
        return '쇼핑 및 여가'
    elif code_int in [5,6,7,8]:
        return '문화 및 역사 탐방'
    elif code_int in [3,4]:
        return '휴양 및 자연 감상'
    elif code_int in [13,14]:
        return '건강 및 미용'
    elif code_int in [15,16]:
        return '스포츠 및 레포츠'
    elif code_int in [9,10]:
        return '공연 및 행사 참여'
    elif code_int == 17:
        return '기타'
    else:
        return '미상'
cols = [
    'Q3_1a1', 'Q3_1a2','Q3_1a3'
]

for col in cols:
    df_combined.loc[:, col] = df_combined[col].apply(group_tour_activity)

In [19]:
def group_q3_2(code):
    if code in [1, 6]:
        return "이동/절차 관련"
    elif code in [2, 8, 9]:
        return "정보/소통 관련"
    elif code in [3, 7]:
        return "안전/보건 관련"
    elif code == 4:
        return "비용 관련"
    elif code == 5:
        return "숙박 시설"
    elif code in [10, 11]:
        return "디지털/정보 관련"
    elif code == 12:
        return "기타"
    else:
        return "미상"
cols = [
    'Q3_2a1','Q3_2a2','Q3_2a3'
]

for col in cols:
    df_combined.loc[:, col] = df_combined[col].apply(group_q3_2)

In [20]:
def group_q4_source(code):
    if code == 1:
        return '디지털 매체'
    elif code == 2:
        return '오프라인 민간 채널'
    elif code in [3, 5]:
        return '전통 매체'
    elif code == 4:
        return '지인 추천'
    elif code == 6:
        return '민간 기업'
    elif code == 7:
        return '공공기관'
    elif code == 8:
        return '기타'
    elif code == 9:
        return '정보 없음'
    else:
        return '미상'
cols = [
    'Q4a1','Q4a2','Q4a3'
]

for col in cols:
    df_combined.loc[:, col] = df_combined[col].apply(group_q4_source)

In [21]:
df_combined.loc[:, "Q7A"] = df_combined["Q7A"].astype("category")
TYP_map = {
    1:"동반자 없음",             
    2: "동반자 있음"               
}

df_combined.loc[:, "Q7A"] = df_combined["Q7A"].map(TYP_map)

In [29]:
def determine_companion_group(row):
    if row['Q7a_dk'] == 1:
        return '단독 여행'
    elif row['Q7a2'] == 2 or row['Q7a3'] == 3 or row['Q7a4'] == 4 or row['Q7a5'] == 5:
        return '가족 여행'
    elif row['Q7a6'] == 6:
        return '친구 여행'
    elif row['Q7a7'] == 7:
        return '직장/업무 여행'
    elif row['Q7a8'] == 8:
        return '기타'
    else:
        return '미상'
df_combined.loc[:, 'Type_of_companion'] = df_combined.apply(determine_companion_group, axis=1)

In [31]:
def Activities_participated_group(row):
    if row['Q8a01'] == 1 and row['Q8a02'] == 2:
        return '음식/쇼핑 관련'
    elif row['Q8a03'] == 3 or row['Q8a04'] == 4:
        return '휴양/자연 관련'
    elif row['Q8a05'] == 5 or  row['Q8a06'] == 6 or row['Q8a07'] == 7:
        return '문화/역사 관련'
    elif row['Q8a08'] == 8 or  row['Q8a09'] == 9 or row['Q8a10'] == 10:
        return '한류/공연 관련'
    elif row['Q8a11'] == 11 or row['Q8a12'] == 12:
        return '여가/오락 관련'
    elif row['Q8a13'] == 13 or row['Q8a14'] == 14:
        return '뷰티/헬스케어 관련'
    elif row['Q8a15'] == 15 or row['Q8a16'] == 16:
        return '스포츠/레포츠'
    elif row['Q8a17'] == 17 or  row['Q8a18'] == 18 or row['Q8a19'] == 19:
        return '업무/학술 목적'
    elif row['Q8a20'] == 20:
        return '기타'
    else:
        return '미상'
df_combined.loc[:, 'Activities_participated'] = df_combined.apply(Activities_participated_group, axis=1)

In [32]:
def group(row):
    if row['Q9_2a01'] == 1:
        return '서울'
    elif row['Q9_2a02'] == 2:
        return '경기'
    elif row['Q9_2a03'] == 3:
        return '인천'
    elif row['Q9_2a04'] == 4:
        return '강원'
    elif row['Q9_2a05'] == 5:
        return '대전'
    elif row['Q9_2a06'] == 6:
        return '충북'
    elif row['Q9_2a07'] == 7:
        return '충남'
    elif row['Q9_2a08'] == 8:
        return '세종'
    elif row['Q9_2a09'] == 9:
        return '경북'
    elif row['Q9_2a10'] == 10:
        return '경남'
    elif row['Q9_2a11'] == 11:
        return '대구'
    elif row['Q9_2a12'] == 12:
        return '울산'
    elif row['Q9_2a13'] == 13:
        return '부산'
    elif row['Q9_2a14'] == 14:
        return '광주'
    elif row['Q9_2a15'] == 15:
        return '전북'
    elif row['Q9_2a16'] == 16:
        return '전남'
    elif row['Q9_2a17'] == 17:
        return '제주'
    else:
        return '미상'
df_combined.loc[:, '대표방문지역'] = df_combined.apply(group, axis=1)

In [33]:
df_combined.대표방문지역.unique()

array(['서울', '인천', '경기', '경북', '충남', '대전', '전북', '대구', '강원', '부산', '충북',
       '경남', '제주', '세종', '울산', '광주', '전남'], dtype=object)

In [34]:
def stay_group(row):
    if row['R일HAP'] >= 1 and row['R일HAP'] <= 10:
        return '10일 이하'
    elif row['R일HAP'] == 11:
        return '2달 이하'
    elif row['R일HAP'] == 12:
        return '2달 이상'
    else:
        return '미상'
df_combined.loc[:, '체재기간_대분류'] = df_combined.apply(stay_group, axis=1)

In [35]:
def agree_level(value):
    if value in [4, 5]:
        return '긍정적'
    elif value in [1, 2]:
        return '부정적'
    elif value == 3:
        return '중립'
    else:
        return '미상'
df_combined.loc[:, '재방문_의사_분류']= df_combined['Q13'].apply(agree_level)
df_combined.loc[:, '추천_의향_분류']= df_combined['Q14'].apply(agree_level)

In [36]:
df_combined['추천_의향_분류'].unique()

array(['긍정적', '중립', '부정적'], dtype=object)

In [38]:
DNAT_map = {
    1: "중국",
    2: "일본",
    3: "대만",
    4: "미국",
    5: "홍콩",
    6: "태국",
    7: "베트남",
    8: "말레이시아",
    9: "필리핀",
    10: "싱가포르",
    11: "러시아",
    12: "중동",
    13: "인도네시아",
    14: "캐나다",
    15:"호주",
    16:"영국",
    17:"몽콜",
    18:"독인",
    19:"프랑스",
    20:"인도",
    21:"기타"
}
df_combined.loc[:, "D_NAT"] = df_combined["D_NAT"].map(DNAT_map)

In [39]:
DSEX_map = {
    1: "남성",
    2: "여성"}
df_combined.loc[:, "D_SEX"] = df_combined["D_SEX"].map(DSEX_map)

In [40]:
DAGE_map = {
    1: "15-19세",
    2: "20대",
    3:"30대",
    4:"40대",
    5:"50대",
    6:"60대 이상"
}
df_combined.loc[:, "D_AGE"] = df_combined["D_AGE"].map(DAGE_map)

In [41]:
df_combined.loc[:, "D_MOK"] = df_combined["D_MOK"].astype("category")
D_MOK_map = {
    1: "여가, 위락, 휴식",      
    2: "친구, 친지 방문",          
    3: "사업 또는 전문 활동",       
    4: "교육",                    
    5: "기타"                      
}
df_combined.loc[:, "D_MOK"] = df_combined["D_MOK"].map(D_MOK_map)

In [43]:
df_combined.loc[:, "D_GUB"] = df_combined["D_GUB"].astype("category")
D_GUB_map = {
    1: "별여행",             
    2: "Air-tel package",      
    3: "단체여행"                 
}

df_combined.loc[:, "D_GUB"] = df_combined["D_GUB"].map(D_GUB_map)

In [45]:
print(df_combined.columns.tolist())

['MVIT', 'TYP', 'Q1', 'Q1_1a1', 'Q2a1', 'Q2a2', 'Q2a3', 'Q3_1a1', 'Q3_1a2', 'Q3_1a3', 'Q3_2a1', 'Q3_2a2', 'Q3_2a3', 'Q4a1', 'Q4a2', 'Q4a3', 'Q7A', 'Q7a_dk', 'Q7a2', 'Q7a3', 'Q7a4', 'Q7a5', 'Q7a6', 'Q7a7', 'Q7a8', 'Q8a01', 'Q8a02', 'Q8a03', 'Q8a04', 'Q8a05', 'Q8a06', 'Q8a07', 'Q8a08', 'Q8a09', 'Q8a10', 'Q8a11', 'Q8a12', 'Q8a13', 'Q8a14', 'Q8a15', 'Q8a16', 'Q8a17', 'Q8a18', 'Q8a19', 'Q8a20', 'Q9_2a01', 'Q9_2a02', 'Q9_2a03', 'Q9_2a04', 'Q9_2a05', 'Q9_2a06', 'Q9_2a07', 'Q9_2a08', 'Q9_2a09', 'Q9_2a10', 'Q9_2a11', 'Q9_2a12', 'Q9_2a13', 'Q9_2a14', 'Q9_2a15', 'Q9_2a16', 'Q9_2a17', 'M일HAP', 'R일HAP', '총액1인TOT2', 'MDAY전체TOT_RAW61', 'MDAY개별대체61', 'MDAY에어대체61', 'MDAY단체대체61', 'Q12a01', 'Q12a02', 'Q12a03', 'Q12a04', 'Q12a05', 'Q12a06', 'Q12a07', 'Q12a08', 'Q12a09', 'Q12a10', 'Q12a11', 'Q12a12', 'Q12a13', 'Q12a14', 'Q12a15', 'Q12a16', 'Q12a17', 'Q12a18', 'Q12a19', 'Q12a20', 'Q12a21', 'Q12a22', 'Q12a23', 'Q12a24', 'Q12a25', 'Q12a26', 'Q12a27', 'Q13', 'Q14', 'D_MON', 'D_BUN', 'D_NAT', 'D_SEX', 'D_AGE', 

# Rename Columns 

In [1]:
column_rename_dict = {
    'MVIT': 'Average_of_visiter',
    'TYP': 'Visit_Type',
    'Q1': 'Main_purpose',
    'Q1_1a1': 'Reason_traveling',
    'Q2a1': 'Asian_Considered_Country_1st',
    'Q2a2': 'Asian_Considered_Country_2nd',
    'Q2a3': 'Asian_Considered_Country_3rd',
    'Q3_1a1': 'Tourism_Activities_1',
    'Q3_1a2': 'Tourism_Activities_2',
    'Q3_1a3': 'Tourism_Activities_3',
    'Q3_2a1': 'Considered_Tourism_Infrastructure_1',
    'Q3_2a2': 'Considered_Tourism_Infrastructure_2',
    'Q3_2a3': 'Considered_Tourism_Infrastructure_3',
    'Q4a1': 'Routes_Collecting_1',
    'Q4a2': 'Routes_Collecting_2',
    'Q4a3': 'Routes_Collecting_3',
    'Q7A': 'Companion',
    'Q7a_dk': 'Companion_Alone',
    'Q7a2': 'Companion_Spouse',
    'Q7a3': 'Companion_Parents',
    'Q7a4': 'Companion_Children',
    'Q7a5': 'Companion_Relatives',
    'Q7a6': 'Companion_Friends',
    'Q7a7': 'Companion_Colleagues',
    'Q7a8': 'Companion_Others',
    'Q8a01': 'Activity_Gourmet_Tourism',         
    'Q8a02': 'Activity_Shopping',            
    'Q8a03': 'Activity_Nature_Scenery',           
    'Q8a04': 'Activity_Wellness',               
    'Q8a05': 'Activity_Historic_Sites',      
    'Q8a06': 'Activity_Traditional_Culture',   
    'Q8a07': 'Activity_Museum_Exhibition',    
    'Q8a08': 'Activity_KPOP_Drama_Sites',      
    'Q8a09': 'Activity_Performances',    
    'Q8a10': 'Activity_Local_Festival',         
    'Q8a11': 'Activity_Nightlife',             
    'Q8a12': 'Activity_Entertainment',         
    'Q8a13': 'Activity_Beauty_Tourism',           
    'Q8a14': 'Activity_Medical_Checkup',      
    'Q8a15': 'Activity_Sports_Watching',        
    'Q8a16': 'Activity_Sports_Participation',
    'Q8a17': 'Activity_Conference',            
    'Q8a18': 'Activity_Business',               
    'Q8a19': 'Activity_Training_Study',      
    'Q8a20': 'Activity_Others', 
    'Q9_2a01': 'Visited_Seoul',
    'Q9_2a02': 'Visited_Gyeonggi',
    'Q9_2a03': 'Visited_Incheon',
    'Q9_2a04': 'Visited_Gangwon',
    'Q9_2a05': 'Visited_Daejeon',
    'Q9_2a06': 'Visited_Chungbuk',
    'Q9_2a07': 'Visited_Chungnam',
    'Q9_2a08': 'Visited_Sejong',
    'Q9_2a09': 'Visited_Gyeongbuk',
    'Q9_2a10': 'Visited_Gyeongnam',
    'Q9_2a11': 'Visited_Daegu',
    'Q9_2a12': 'Visited_Ulsan',
    'Q9_2a13': 'Visited_Busan',
    'Q9_2a14': 'Visited_Gwangju',
    'Q9_2a15': 'Visited_Jeonbuk',
    'Q9_2a16': 'Visited_Jeonnam',
    'Q9_2a17': 'Visited_Jeju',
    'M일HAP': 'Total_period_of_stay',
    'R일HAP': 'Total_Stay_Duration_Category',
    '총액1인TOT2': 'Total_Expenditure_PerPerson',
    'MDAY전체TOT_RAW61': 'Daily_Expenditure_PerPerson',
    'MDAY개별대체61': 'Daily_Expenditure_Individual_Trip',
    'MDAY에어대체61': 'Daily_Expenditure_Airtel_Trip',
    'MDAY단체대체61': 'Daily_Expenditure_Group_Trip',
    'Q12a01': 'Satisfaction_FoodTour',
    'Q12a02': 'Satisfaction_Shopping',
    'Q12a03': 'Satisfaction_Nature_Scenery',
    'Q12a04': 'Satisfaction_Wellness',
    'Q12a05': 'Satisfaction_Historic_Sites',
    'Q12a06': 'Satisfaction_Traditional_Culture',
    'Q12a07': 'Satisfaction_Museum_Exhibition',
    'Q12a08': 'Satisfaction_KPOP_Drama_Sites',
    'Q12a09': 'Satisfaction_Performances',
    'Q12a10': 'Satisfaction_Local_Festival',
    'Q12a11': 'Satisfaction_Nightlife',
    'Q12a12': 'Satisfaction_Entertainment',
    'Q12a13': 'Satisfaction_Beauty_Tourism',
    'Q12a14': 'Satisfaction_Medical_Checkup',
    'Q12a15': 'Satisfaction_Sports_Watching',
    'Q12a16': 'Satisfaction_Sports_Participation',
    'Q12a17': 'Satisfaction_Immigration_Procedure',
    'Q12a18': 'Satisfaction_Language_Communication',
    'Q12a19': 'Satisfaction_Public_Safety',
    'Q12a20': 'Satisfaction_Travel_Cost',
    'Q12a21': 'Satisfaction_Accommodation',
    'Q12a22': 'Satisfaction_Public_Transport',
    'Q12a23': 'Satisfaction_Navigation',
    'Q12a24': 'Satisfaction_Tourist_Info_Service',
    'Q12a25': 'Satisfaction_Mobile_Internet_Use',
    'Q12a26': 'Satisfaction_Digital_Tourism_Info',
    'Q12a27': 'Satisfaction_Korean_Tour_Agency_Guide',
    'Q13': 'Willingness_to_Revisit',
    'Q14': 'Willingness_to_Recommend',
    'D_MON': 'Survey_Month',
    'D_BUN': 'Survey_Round',
    'D_NAT': 'Nationality_1',
    'D_SEX': 'Gender',
    'D_AGE': 'Age',
    'D_MOK': 'Purpose_Group',
    'D_GUB': 'Survey_Category',
    'weight1': 'Weight1',
    'weight2': 'Weight2',
    'weight3': 'Weight3',
    'weight4': 'Weight4',
    'weight': 'Final_Weight',
    'YEAR': 'Survey_Year',
    'Type_of_companion': 'Companion_Type',
    'Activities_participated': 'Participated_Activities',
    '대표방문지역': 'Main_Visited_Region',
    '체재기간_대분류': 'Stay_Duration_Category',
    '재방문_의사_분류': 'Revisit_Intention_Classified',
    '추천_의향_분류': 'Recommend_Intention_Classified'
}


In [50]:
df_combined.rename(columns=column_rename_dict, inplace=True)

In [53]:
df_combined.describe(include="all")

Unnamed: 0,Average_of_visiter,Visit_Type,Main_purpose,Reason_traveling,Asian_Considered_Country_1st,Asian_Considered_Country_2nd,Asian_Considered_Country_3rd,Tourism_Activities_1,Tourism_Activities_1.1,Tourism_Activities_1.2,...,Weight3,Weight4,Final_Weight,Survey_Year,Companion_Type,Participated_Activities,Main_Visited_Region,Stay_Duration_Category,Revisit_Intention_Classified,Recommend_Intention_Classified
count,32412.0,32412,32412,32412,16166,11756,9081,32412,32412,32412,...,4029.0,4133.0,32412.0,32412.0,32412,32412,32412,32412,32412,32412
unique,,3,5,6,14,14,14,9,9,9,...,,,,,5,10,17,4,3,3
top,,별여행,"여가, 위락, 휴식",경험/호감,일본,일본,태국,미상,미상,미상,...,,,,,가족 여행,음식/쇼핑 관련,서울,미상,긍정적,긍정적
freq,,27524,19876,9698,8536,2016,1476,8182,9383,12669,...,,,,,11286,20125,26634,16196,29755,31308
mean,3.332963,,,,,,,,,,...,904.390916,881.427534,721.104992,2023.500309,,,,,,
std,6.056328,,,,,,,,,,...,693.624903,607.247597,585.59486,0.500008,,,,,,
min,1.0,,,,,,,,,,...,89.466756,39.045707,32.474987,2023.0,,,,,,
25%,1.0,,,,,,,,,,...,303.144237,370.740774,269.710568,2023.0,,,,,,
50%,2.0,,,,,,,,,,...,674.336361,733.993394,523.960297,2024.0,,,,,,
75%,3.0,,,,,,,,,,...,1433.140814,1339.977602,1051.332824,2024.0,,,,,,


# Push into csv and excel file 

In [56]:
df_combined.to_csv('/Users/nguyentoan/Desktop/Foreign_Tourist_Survey_Data(2023-2024).csv', index=False, encoding='utf-8-sig')

In [59]:
df_combined.to_excel('/Users/nguyentoan/Desktop/Foreign_Tourist_Survey_Data(2023-2024).xlsx', index=False)