In [1]:
import pandas as pd
import altair as alt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.preprocessing import MinMaxScaler
import os
import warnings

warnings.filterwarnings('ignore')

In [2]:
data_path = '데이터'
base_file = os.path.join(data_path, 'golden_compass_base_index.csv')

try:
    df_base = pd.read_csv(base_file)
    df_base['날짜'] = pd.to_datetime(df_base['날짜'])
    df_base = df_base.set_index('날짜').sort_index()
    print(df_base.tail())
except Exception as e:
    print(f"파일 로드 중 오류 발생: {e}")

               년  월      년월    총 입도객  렌터카 가동률  전세버스 가동률  총 입도객_norm  \
날짜                                                                    
2025-04-01  2025  4  202504  1143910     68.3      39.6    0.629975   
2025-05-01  2025  5  202505  1252408     71.0      39.2    0.846750   
2025-06-01  2025  6  202506  1200051     74.0      27.2    0.742142   
2025-07-01  2025  7  202507  1312159     75.2      19.1    0.966130   
2025-08-01  2025  8  202508  1329111     79.6      14.5    1.000000   

            렌터카 가동률_norm  전세버스 가동률_norm  관광 포화 지수  
날짜                                                 
2025-04-01      0.420513       0.807388  0.619292  
2025-05-01      0.558974       0.796834  0.734186  
2025-06-01      0.712821       0.480211  0.645058  
2025-07-01      0.774359       0.266491  0.668993  
2025-08-01      1.000000       0.145119  0.715040  


2026년도 예측

In [3]:
metrics_to_forecast = ['총 입도객', '렌터카 가동률', '전세버스 가동률']

In [4]:
# SARIMA 모델 파라미터 (p,d,q)(P,D,Q,m)

sarima_order = (1, 1, 1)
sarima_seasonal_order = (1, 1, 1, 12)

forecast_dict = {}

In [5]:
for metric in metrics_to_forecast:
    # 1. 모델 학습
    model = SARIMAX(df_base[metric], 
                    order=sarima_order, 
                    seasonal_order=sarima_seasonal_order,
                    enforce_stationarity=False,
                    enforce_invertibility=False)
    
    model_fit = model.fit(disp=False)
    
    # 2. 16개월 (2025년 9월부터 2026년 12월까지) 예측
    forecast = model_fit.forecast(steps=16)
    
    # 3. 음수 값은 0으로 보정
    forecast[forecast < 0] = 0
    
    forecast_dict[metric] = forecast
    print(f" - '{metric}' 예측 완료.")

df_forecast = pd.DataFrame(forecast_dict)
df_forecast.head()

 - '총 입도객' 예측 완료.
 - '렌터카 가동률' 예측 완료.
 - '전세버스 가동률' 예측 완료.


Unnamed: 0,총 입도객,렌터카 가동률,전세버스 가동률
2025-09-01,1184627.0,69.290408,31.472657
2025-10-01,1071035.0,68.244281,42.751371
2025-11-01,1082773.0,65.951114,28.451474
2025-12-01,1167584.0,67.460384,12.689354
2026-01-01,1451755.0,63.567553,7.652776


In [6]:
df_full = pd.concat([df_base[metrics_to_forecast], df_forecast])

데이터 정규화`'MinMaxScaler'`사용

In [7]:
scaler_full = MinMaxScaler()

normalized_cols = [f'{col}_norm' for col in metrics_to_forecast]
df_full[normalized_cols] = scaler_full.fit_transform(df_full[metrics_to_forecast])

In [8]:
df_full['관광 포화 지수'] = df_full[normalized_cols].mean(axis=1)

In [9]:
df_full = df_full.reset_index().rename(columns={'index': '날짜'})
df_full['년'] = df_full['날짜'].dt.year
df_full['월'] = df_full['날짜'].dt.month
df_full['월_str'] = df_full['날짜'].dt.strftime('%m월') # Heatmap 라벨용

In [10]:
df_full.tail()

Unnamed: 0,날짜,총 입도객,렌터카 가동률,전세버스 가동률,총 입도객_norm,렌터카 가동률_norm,전세버스 가동률_norm,관광 포화 지수,년,월,월_str
43,2026-08-01,0.0,81.43806,12.888841,0.0,1.0,0.133412,0.377804,2026,8,08월
44,2026-09-01,0.0,70.558278,32.939958,0.0,0.494038,0.644305,0.379448,2026,9,09월
45,2026-10-01,0.0,69.223118,41.946713,0.0,0.431946,0.873793,0.435246,2026,10,10월
46,2026-11-01,0.0,66.932668,28.645196,0.0,0.325429,0.534877,0.286769,2026,11,11월
47,2026-12-01,0.0,68.528358,12.192004,0.0,0.399637,0.115657,0.171765,2026,12,12월


In [11]:
forecast_output_file = os.path.join(data_path, 'golden_compass_forecasted.csv')
df_full.to_csv(forecast_output_file, index=False, encoding='utf-8-sig')

황금시기 캘린더 시각화

In [12]:
data_2026 = df_full[df_full['년'] == 2026]

In [13]:
heatmap = alt.Chart(data_2026).mark_rect().encode(
    x=alt.X('월_str', title="월", sort=None),
    y=alt.Y('년:O', title="연도"), 
    color=alt.Color('관광 포화 지수', 
                    title="포화 지수 (낮을수록 쾌적)",
                    scale=alt.Scale(range='heatmap', domain=[0, 1])), 
    tooltip=[
        alt.Tooltip('년'),
        alt.Tooltip('월_str', title='월'),
        alt.Tooltip('관광 포화 지수', format='.3f')]
).properties(
    title="2026년 '황금시기' 예측 캘린더",
    width=alt.Step(60) 
)

JSON파일로 저장

In [14]:
heatmap_chart_file = os.path.join(data_path, 'golden_calendar_heatmap_2026.json')
heatmap.save(heatmap_chart_file)

In [15]:
heatmap

전체 추이 시각화

In [16]:
line_chart = alt.Chart(df_full).mark_line(point=True).encode(
    x=alt.X('날짜', title='날짜'),
    y=alt.Y('관광 포화 지수', title='관광 포화 지수 (0: 쾌적 ~ 1: 혼잡)'),
    tooltip=[
        alt.Tooltip('날짜', format="%Y년 %m월"),
        alt.Tooltip('관광 포화 지수', format='.3f')
    ]
).properties(
    title="제주 관광 포화 지수 전체 추이 (2023-2026)"
).interactive()

JSON파일로 저장

In [17]:
line_chart_file = os.path.join(data_path, 'golden_calendar_line_chart.json')
line_chart.save(line_chart_file)

In [18]:
line_chart

월별 코스 추천 캘린더(Develop)

In [19]:
file_hallasan = os.path.join(data_path, '한라산 탐방객_2021_202508.csv')

In [20]:
try:
    df_hallasan_raw = pd.read_csv(file_hallasan, 
                                  encoding='utf-8-sig', 
                                  thousands=',')
    df_hallasan_raw['탐방객'] = pd.to_numeric(df_hallasan_raw['탐방객'], errors='coerce')
    if df_hallasan_raw['탐방객'].isnull().any():
        df_hallasan_raw['탐방객'] = df_hallasan_raw['탐방객'].interpolate()
    df_hallasan_raw['날짜'] = pd.to_datetime(df_hallasan_raw['년'].astype(str) + '-' + df_hallasan_raw['월'].astype(str) + '-01')
    df_hallasan_ts = df_hallasan_raw.set_index('날짜')['탐방객'].sort_index()
    df_hallasan_ts.tail()
except Exception as e:
    print(f"파일 로드 중 오류 발생: {e}")

한라산 탐방객 시계열 예측

In [21]:
sarima_order = (1, 1, 1)
sarima_seasonal_order = (1, 1, 1, 12)

In [22]:
model_hallasan = SARIMAX(df_hallasan_ts, 
                         order=sarima_order, 
                         seasonal_order=sarima_seasonal_order,
                         enforce_stationarity=False,
                         enforce_invertibility=False)

model_hallasan_fit = model_hallasan.fit(disp=False)

In [23]:
forecast_hallasan = model_hallasan_fit.forecast(steps=16)
forecast_hallasan[forecast_hallasan < 0] = 0
forecast_hallasan

2025-09-01    55852.669470
2025-10-01    93540.188896
2025-11-01    82091.772018
2025-12-01    92333.547970
2026-01-01    77916.824073
2026-02-01    26761.640285
2026-03-01    38316.647039
2026-04-01    62411.376600
2026-05-01    80157.905136
2026-06-01    67233.888712
2026-07-01    54020.240685
2026-08-01    51687.514995
2026-09-01    51206.532538
2026-10-01    87310.214333
2026-11-01    77896.917821
2026-12-01    89907.368410
Freq: MS, Name: predicted_mean, dtype: float64

In [24]:
forecast_file = os.path.join(data_path, 'golden_compass_forecasted.csv')
df_full = pd.read_csv(forecast_file)
df_full['날짜'] = pd.to_datetime(df_full['날짜'])
df_full = df_full.set_index('날짜')

In [25]:
df_hallasan_full = pd.concat([df_hallasan_ts, forecast_hallasan]).rename('한라산 탐방객')

In [26]:
df_merged_wellness = df_full.merge(df_hallasan_full, left_index=True, right_index=True, how='left')

웰니스 쾌적도 생성

In [27]:
scaler_h = MinMaxScaler()
df_merged_wellness['한라산_norm'] = scaler_h.fit_transform(df_merged_wellness[['한라산 탐방객']])

In [28]:
df_merged_wellness['웰니스 쾌적도'] = (df_merged_wellness['관광 포화 지수'] + df_merged_wellness['한라산_norm']) / 2

In [29]:
df_merged_wellness[['관광 포화 지수', '한라산_norm', '웰니스 쾌적도']].tail()

Unnamed: 0_level_0,관광 포화 지수,한라산_norm,웰니스 쾌적도
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2026-08-01,0.377804,0.2856,0.331702
2026-09-01,0.379448,0.280089,0.329768
2026-10-01,0.435246,0.693765,0.564506
2026-11-01,0.286769,0.585907,0.436338
2026-12-01,0.171765,0.723523,0.447644


웰니스 쾌적도 시각화

In [30]:
df_wellness_chart = df_merged_wellness.reset_index()

In [31]:
wellness_line_chart = alt.Chart(df_wellness_chart).mark_line().encode(
    x=alt.X('날짜', title='날짜'),
    y=alt.Y('웰니스 쾌적도', title='쾌적도 지수 (0: 쾌적)'),
    tooltip=[
        alt.Tooltip('날짜', format="%Y년 %m월"),
        alt.Tooltip('웰니스 쾌적도', format='.3f'),
        alt.Tooltip('관광 포화 지수', format='.3f'),
        alt.Tooltip('한라산_norm', title='한라산 혼잡도', format='.3f')
    ]
).properties(
    title="월별 '웰니스 쾌적도 지수' (관광 포화 + 한라산)"
).interactive()

In [32]:
wellness_line_chart

골프장/미식 테마 분석

In [33]:
file_golf = os.path.join(data_path, '골프장 내장객_2023_202506.csv')

In [34]:
try:
    df_golf_raw = pd.read_csv(file_golf, 
                              encoding='utf-8-sig', thousands=',')
    df_golf_raw = df_golf_raw.dropna(subset=['년', '월'])
    df_golf_raw['년'] = df_golf_raw['년'].astype(int)
    df_golf_raw['월'] = df_golf_raw['월'].astype(int)
    df_golf_raw['날짜'] = pd.to_datetime(df_golf_raw['년'].astype(str) + '-' + df_golf_raw['월'].astype(str) + '-01')
    df_golf_ts = df_golf_raw.set_index('날짜')['도외 및 외국인'].sort_index()

    if df_golf_ts.isnull().any():
        df_golf_ts = df_golf_ts.interpolate()
    df_golf_ts = df_golf_ts.fillna(0)
except Exception as e:
    print(f"파일 로드 중 오류 발생: {e}")

In [35]:
df_golf_ts.head()

날짜
2023-01-01     56430.0
2023-02-01     88037.0
2023-03-01    117808.0
2023-04-01    145938.0
2023-05-01    150877.0
Name: 도외 및 외국인, dtype: float64

골프장 내장객 시계열 예측

In [36]:
sarima_order = (1, 1, 1)
sarima_seasonal_order = (1, 1, 1, 12)

In [37]:
model_golf = SARIMAX(df_golf_ts, 
                         order=sarima_order, 
                         seasonal_order=sarima_seasonal_order,
                         enforce_stationarity=False,
                         enforce_invertibility=False)

model_golf_fit = model_golf.fit(disp=False)

In [38]:
forecast_golf = model_golf_fit.forecast(steps=18)
forecast_golf[forecast_golf < 0] = 0
forecast_golf.head()


2025-07-01     80463.897437
2025-08-01    104098.172281
2025-09-01    114136.089577
2025-10-01    154520.166746
2025-11-01    138668.083925
Freq: MS, Name: predicted_mean, dtype: float64

골프쾌적도 지수 생성

In [39]:
df_golf_full = pd.concat([df_golf_ts, forecast_golf]).rename('골프장 내장객(도외)')
df_merged_themes = df_merged_wellness.merge(df_golf_full, left_index=True, right_index=True, how='left')

In [40]:
scaler_g = MinMaxScaler()
df_merged_themes['골프_norm'] = scaler_g.fit_transform(df_merged_themes[['골프장 내장객(도외)']])
df_merged_themes['골프 쾌적도'] = df_merged_themes['관광 포화 지수'] - df_merged_themes['골프_norm']
df_merged_themes[['관광 포화 지수', '골프_norm', '골프 쾌적도']].head()

Unnamed: 0_level_0,관광 포화 지수,골프_norm,골프 쾌적도
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,0.448781,0.140028,0.308753
2023-02-01,0.493013,0.387501,0.105512
2023-03-01,0.579607,0.620598,-0.04099
2023-04-01,0.712633,0.840846,-0.128213
2023-05-01,0.700981,0.879517,-0.178536


시니어 관심 미식 키워드 분석

In [41]:
file_search = os.path.join(data_path, '제주관광공사_제주관광정보시스템(VISIT JEJU)_검색어트랜드내역.CSV')

In [42]:
try:
    df_search_raw = pd.read_csv(file_search, delimiter=',', parse_dates=['통계일'])
except UnicodeDecodeError:
    df_search_raw = pd.read_csv(file_search, delimiter=',', parse_dates=['통계일'], encoding='cp949')
except Exception as e:
    print(f"검색어 파일 로드 오류: {e}")

In [43]:
target_ages = ['50대', '60대이상']
df_senior_search = df_search_raw[df_search_raw['연령대'].isin(target_ages)].copy()

In [44]:
df_senior_search['월'] = df_senior_search['통계일'].dt.month
foodie_keywords = [
    # 제주 대표메뉴
    '흑돼지', '고기국수', '갈치', '갈치조림', '갈치구이', '고등어', '고등어회',
    
    # 해산물
    '방어', '한치', '물회', '해물탕', '전복', '딱새우', '성게', '보말', '해물라면', '보말칼국수',
    
    # 일반 검색
    '맛집', '카페', '서귀포 맛집', '애월 맛집', '제주 맛집', '횟집',
    
    # 특산물/디저트
    '오메기떡', '감귤', '우도 땅콩'
]
df_foodie_search = df_senior_search[df_senior_search['키워드'].isin(foodie_keywords)]

In [45]:
# 미식 키워드만 필터링
df_foodie_search = df_senior_search[df_senior_search['키워드'].isin(foodie_keywords)]

# '월'별 '키워드' 검색 점수 합산
df_foodie_monthly = df_foodie_search.groupby(['월', '키워드'])['점수'].sum().reset_index()

# 각 '월'별 Top 1 미식 키워드 찾기 (월별 대표 음식)
df_foodie_top_overall = df_foodie_monthly.loc[df_foodie_monthly.groupby('월')['점수'].idxmax()]
print(df_foodie_top_overall.sort_values(by='월'))

# 각 '키워드'가 몇 월에 피크(Peak)를 찍는지 분석
df_foodie_peak_month = df_foodie_monthly.loc[df_foodie_monthly.groupby('키워드')['점수'].idxmax()]

     월 키워드            점수
1    1  방어  172301.61944
2    2  방어   75273.66220
5    3  한치   10138.79268
7    4  한치    6682.27882
8    5  한치   15368.51156
9    6  한치   22122.24828
11   7  한치   55384.36197
13   8  한치   28322.28393
14   9  물회    2466.99350
15  11  방어   94012.46454
17  12  방어  168682.79284


In [47]:
foodie_ranking_file = os.path.join(data_path, 'golden_compass_foodie_ranking.csv')
df_foodie_monthly.to_csv(foodie_ranking_file, index=False, encoding='utf-8-sig')

In [46]:
df_final_themes = df_merged_themes.reset_index().rename(columns={'index': '날짜'})
final_themes_file = os.path.join(data_path, 'golden_compass_final_themes.csv')
df_final_themes.to_csv(final_themes_file, index=False, encoding='utf-8-sig')
df_final_themes.tail()

Unnamed: 0,날짜,총 입도객,렌터카 가동률,전세버스 가동률,총 입도객_norm,렌터카 가동률_norm,전세버스 가동률_norm,관광 포화 지수,년,월,월_str,한라산 탐방객,한라산_norm,웰니스 쾌적도,골프장 내장객(도외),골프_norm,골프 쾌적도
43,2026-08-01,0.0,81.43806,12.888841,0.0,1.0,0.133412,0.377804,2026,8,08월,51687.514995,0.2856,0.331702,100784.77161,0.487312,-0.109507
44,2026-09-01,0.0,70.558278,32.939958,0.0,0.494038,0.644305,0.379448,2026,9,09월,51206.532538,0.280089,0.329768,110078.947431,0.560082,-0.180634
45,2026-10-01,0.0,69.223118,41.946713,0.0,0.431946,0.873793,0.435246,2026,10,10월,87310.214333,0.693765,0.564506,148413.669874,0.86023,-0.424984
46,2026-11-01,0.0,66.932668,28.645196,0.0,0.325429,0.534877,0.286769,2026,11,11월,77896.917821,0.585907,0.436338,132422.610797,0.735025,-0.448257
47,2026-12-01,0.0,68.528358,12.192004,0.0,0.399637,0.115657,0.171765,2026,12,12월,89907.36841,0.723523,0.447644,77605.990284,0.305829,-0.134065
