In [54]:
import warnings

import joblib
import numpy as np
import pandas as pd
from catboost import CatBoostRegressor
from tqdm import tqdm

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [55]:
"""2022, 2023 Data concat"""
path2022 = "./data/train_2022"
visit_area_info2022 = pd.read_csv(path2022 + "/tn_visit_area_info_방문지정보_B.csv")  # 방문지 정보 activity
travel2022 = pd.read_csv(path2022 + "/tn_travel_여행_B.csv")  # 여행 travel
traveller_master2022 = pd.read_csv(path2022 + "/tn_traveller_master_여행객 Master_B.csv")  # 여행객 정보 Master traveler

path2023 = "./data/train_2023"
visit_area_info2023 = pd.read_csv(path2023 + "/tn_visit_area_info_방문지정보_F.csv")  # 방문지 정보 activity
travel2023 = pd.read_csv(path2023 + "/tn_travel_여행_F.csv")  # 여행 travel
traveller_master2023 = pd.read_csv(path2023 + "/tn_traveller_master_여행객 Master_F.csv")  # 여행객 정보 Master traveler

In [56]:
"""Data concat"""
visi_area_info = pd.concat([visit_area_info2022, visit_area_info2023], ignore_index=True)
travel = pd.concat([travel2022, travel2023], ignore_index=True)
traveller_master = pd.concat([traveller_master2022, traveller_master2023], ignore_index=True)

In [57]:
"""Visit area information"""

'Visit area information'

In [58]:
def visit_area_information(visit_area_info):
    """방문지 중 유의미한 정보만 추출(1~8까지는 관광지, 나머지는 무의미한 정보)"""
    valid_types = range(1, 9)
    visit_area_info = visit_area_info[visit_area_info["VISIT_AREA_TYPE_CD"].isin(valid_types)]
    visit_area_info = visit_area_info.dropna(subset=["LOTNO_ADDR"])
    visit_area_info = visit_area_info.reset_index(drop=True)
    return visit_area_info

visit_area_spot_info= visit_area_information(visi_area_info)

In [59]:
def address_info(visit_area_info):
    """주소에서 시도와 군구 추출"""
    sido = []
    gungu = []
    for i in range(len(visit_area_info['LOTNO_ADDR'])):
        sido.append(visit_area_info['LOTNO_ADDR'][i].split(' ')[0])
        gungu.append(visit_area_info['LOTNO_ADDR'][i].split(' ')[1])
    return sido, gungu

sido, gungu = address_info(visit_area_spot_info)

visit_area_spot_info['SIDO'] = sido
visit_area_spot_info['GUNGU'] = gungu

busan_spot_info = visit_area_spot_info[visit_area_spot_info['SIDO'] == "부산"]

In [60]:
"""
유의미한 변수만 추출
여행ID 방문지명 시도 군구 방문지유형코드 만족도 재방문의향 추천의향 체류시간(분) 재방문여부"
"""
busan_spot_info = busan_spot_info[['TRAVEL_ID', 'VISIT_AREA_NM', 'SIDO', 'GUNGU', 'VISIT_AREA_TYPE_CD', 'DGSTFN',
                                  'REVISIT_INTENTION', 'RCMDTN_INTENTION', 'RESIDENCE_TIME_MIN', 'REVISIT_YN']].reset_index(drop=True)


In [61]:
def mission_check(travel):
    travel_list = []
    for i in range(len(travel)):
        value = int(travel['TRAVEL_MISSION_CHECK'][i].split(';')[0])
        travel_list.append(value)
    return travel_list


travel['TRAVEL_MISSION_PRIORITY'] = mission_check(travel)
travel = travel[['TRAVEL_ID', 'TRAVELER_ID', 'TRAVEL_MISSION_PRIORITY']]

In [62]:
traveller_master = traveller_master[['TRAVELER_ID', 'GENDER', 'AGE_GRP', 'INCOME', 'TRAVEL_STYL_1', 
                                     'TRAVEL_STYL_2', 'TRAVEL_STYL_3', 'TRAVEL_STYL_4', 'TRAVEL_STYL_5', 
                                     'TRAVEL_STYL_6', 'TRAVEL_STYL_7','TRAVEL_STYL_8', 
                                      'TRAVEL_MOTIVE_1', 'TRAVEL_NUM', 'TRAVEL_COMPANIONS_NUM' ]]



In [63]:
"""busan spot info와 traveller_master, travel merge Result: 부산에 여행온 사람들의 정보"""
df = pd.merge(travel, traveller_master, left_on = 'TRAVELER_ID', right_on = 'TRAVELER_ID', how = 'inner')
df = pd.merge(busan_spot_info, df, left_on = 'TRAVEL_ID', right_on = 'TRAVEL_ID', how = 'left')  

In [64]:
"""Preprocessing"""

df["RESIDENCE_TIME_MIN"] = df["RESIDENCE_TIME_MIN"].replace(0, df["RESIDENCE_TIME_MIN"].median())

df['REVISIT_YN'] = df['REVISIT_YN'].replace("N",0)
df['REVISIT_YN'] = df['REVISIT_YN'].replace("Y",1)

df = df.dropna(subset = ['TRAVEL_STYL_1'])
df = df.reset_index(drop= True)
df = df.dropna(subset = ['TRAVEL_MOTIVE_1'])
df = df.reset_index(drop= True)

In [65]:
"""특정 관광지에 방문한 사람들 중 랜덤으로 한 명을 선택하여 그 사람의 모든 여행 데이터를 새로운 데이터프레임 train_df에 추가합니다"""
df_copy = df.copy()
train_df = pd.DataFrame(columns=df.columns)  

for area in tqdm(df['VISIT_AREA_NM'].unique()):
    area_visitors_df = df_copy[df_copy['VISIT_AREA_NM'] == area]  # 특정 관광지에 방문한 모든 사람의 데이터를 추출
    
    np.random.seed(42)  # 랜덤 시드 설정
    if area_visitors_df.empty:  # 만약 해당 관광지에 방문한 사람이 없으면
        continue  # 아무 작업도 하지 않고 다음 반복으로 넘어감
    
    random_idx = np.random.randint(len(area_visitors_df))  # 해당 관광지에 방문한 사람 중 랜덤으로 한 명의 인덱스를 생성
    random_visitor_df = area_visitors_df.iloc[[random_idx]]  # 랜덤으로 선택된 사람의 데이터 추출
    travel_id = random_visitor_df.iloc[0, 0]  # 해당 사람의 'TRAVEL_ID' 값을 추출
    
    visitor_trips_df = df_copy[df_copy['TRAVEL_ID'] == travel_id]  # 선택된 사람의 모든 여행 데이터를 추출
    
    df_copy = pd.merge(visitor_trips_df, df_copy, how='outer', indicator=True)  # 원본 데이터프레임과 선택된 사람의 데이터를 병합, 병합 결과를 표시
    df_copy = df_copy.query('_merge == "right_only"').drop(columns=['_merge'])  # 원본 데이터프레임에서 선택된 사람의 데이터를 삭제
    
    train_df = train_df.append(visitor_trips_df, ignore_index=True)  # 선택된 사람의 데이터를 train_df 데이터프레임에 추가

100%|██████████| 789/789 [00:05<00:00, 132.00it/s]


In [66]:
while len(df_copy) / len(df) > 0.2:
    np.random.seed(42)  # 랜덤 시드 설정
    random_idx = np.random.randint(len(df_copy))  # 랜덤 인덱스 생성
    random_visitor = df_copy.iloc[[random_idx]]  # 랜덤으로 선택된 사람의 데이터 추출
    travel_id = random_visitor.iloc[0, 0]  # 해당 사람의 'TRAVEL_ID' 값 추출
    visitor_trips = df_copy[df_copy['TRAVEL_ID'] == travel_id]  # 선택된 사람의 모든 여행 데이터 추출
    
    df_copy = pd.merge(visitor_trips, df_copy, how='outer', indicator=True)  # 원본 데이터프레임과 선택된 사람의 데이터를 병합, 병합 결과를 표시
    df_copy = df_copy.query('_merge == "right_only"').drop(columns=['_merge'])  # 원본 데이터프레임에서 선택된 사람의 데이터를 삭제
    
    train_df = train_df.append(visitor_trips, ignore_index=True)  # 선택된 사람의 데이터를 train_df 데이터프레임에 추가

In [67]:
print(len(train_df)) # train set 길이
print(len(df_copy)) # test set 길이
print(len(df))
print(len(df_copy) / len(df))

2141
535
2676
0.19992526158445442


In [68]:
new_train = pd.DataFrame(columns = list(train_df.columns) + ['RESIDENCE_TIME_MIN_mean', 'RCMDTN_INTENTION_mean',
                                                          'REVISIT_YN_mean', 'TRAVEL_COMPANIONS_NUM_mean',
                                                          'REVISIT_INTENTION_mean'])


for i in tqdm(list(train_df['VISIT_AREA_NM'].unique())): #유니크한 관광지 목록 중에서
    df2 = train_df[train_df['VISIT_AREA_NM'] == i] # 특정 관광지에 간 모든 사람 뽑아서
    for j in ['RESIDENCE_TIME_MIN', 'RCMDTN_INTENTION', 'REVISIT_YN', 'TRAVEL_COMPANIONS_NUM', 'REVISIT_INTENTION']:
        #체류시간 평균 산출 
        globals()[str(j)+'_mean'] = df2[str(j)]
        globals()[str(j)+'_mean'] = np.mean(globals()[str(j)+'_mean'])
        #데이터프레임에 들어가게 값을 리스트 형태로 변환
        globals()[str(j)+'_mean'] = np.repeat(globals()[str(j)+'_mean'], len(df2)) 
        df2[str(j)+'_mean'] = globals()[str(j)+'_mean']
    #새로운 데이터프레임에 방문지별 평균값 대입
    new_train = pd.concat([new_train, df2], axis = 0)
    
new_train.sort_values(by = ['TRAVEL_ID'], axis = 0, inplace = True)

100%|██████████| 789/789 [00:01<00:00, 755.90it/s]


In [69]:
path = r"C:\workspace\Ko-Swipe-ML\data\all"
new_train.to_csv(path + '/관광지 추천시스템 Trainset_B.csv', index = False)
df_copy.to_csv(path + '/관광지 추천시스템 Testset_B.csv', index = False)


In [70]:
path = r"C:\workspace\Ko-Swipe-ML\data\all"

Train = pd.read_csv(path + '/관광지 추천시스템 Trainset_B.csv')
test = pd.read_csv(path + '/관광지 추천시스템 Testset_B.csv')

Train.dropna(inplace = True)
Train.reset_index(inplace = True, drop = True )

test.dropna(inplace = True)
test.reset_index(inplace = True, drop = True )

print(Train.shape)
print(test.shape)

(2140, 31)
(535, 26)


In [71]:
count = pd.DataFrame(Train['VISIT_AREA_NM'].value_counts())
print(list(count.groupby(['VISIT_AREA_NM'])['VISIT_AREA_NM'].count()))

[549, 101, 38, 29, 15, 10, 6, 4, 2, 4, 4, 5, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [72]:
"""5번 이상 방문한 곳으로만 필터링"""
Train.reset_index(drop = True, inplace= True)

count = pd.DataFrame(Train['VISIT_AREA_NM'].value_counts())
count['places'] = count.index
five_places = list(count[count['VISIT_AREA_NM']>=5]['places']) 
for i in range(len(Train)):
    if Train['VISIT_AREA_NM'][i] not in five_places:
        Train = Train.drop([i], axis = 0)
Train.reset_index(drop = True, inplace = True)

In [73]:
count_visit5 = pd.DataFrame(Train['VISIT_AREA_NM'].value_counts())
count_visit5.groupby(['VISIT_AREA_NM'])['VISIT_AREA_NM'].count()
count_visit5

Unnamed: 0,VISIT_AREA_NM
광안리해수욕장,173
해운대해수욕장,135
해동용궁사,46
흰여울문화마을,38
신세계백화점 센텀시티점,37
...,...
부산 시민공원,5
신세계 센텀시티몰,5
태종대 다누비열차,5
BIFF 거리,5


In [74]:
"""Drop Features"""
Train.drop(['TRAVELER_ID', 'REVISIT_INTENTION', 'RCMDTN_INTENTION','RESIDENCE_TIME_MIN', 'REVISIT_YN'], axis = 1, inplace = True)
test.drop(['TRAVELER_ID', 'REVISIT_INTENTION', 'RCMDTN_INTENTION','RESIDENCE_TIME_MIN', 'REVISIT_YN'], axis = 1, inplace = True)

In [75]:
"""Change dtype"""
Train['VISIT_AREA_TYPE_CD'] = Train['VISIT_AREA_TYPE_CD'].astype('string')
test['VISIT_AREA_TYPE_CD'] = test['VISIT_AREA_TYPE_CD'].astype('string')

In [76]:
x_train = Train.drop(['DGSTFN', 'TRAVEL_ID'], axis = 1)
y_train = Train['DGSTFN']

In [78]:
category_features = ["VISIT_AREA_NM", "SIDO", "GUNGU", "VISIT_AREA_TYPE_CD", "TRAVEL_MISSION_PRIORITY", "AGE_GRP", "GENDER"]

In [79]:
x_train['TRAVEL_MISSION_PRIORITY'] = x_train['TRAVEL_MISSION_PRIORITY'].astype('int')
x_train['AGE_GRP'] = x_train['AGE_GRP'].astype('int')

In [98]:
x_train

Unnamed: 0,VISIT_AREA_NM,SIDO,GUNGU,VISIT_AREA_TYPE_CD,TRAVEL_MISSION_PRIORITY,GENDER,AGE_GRP,INCOME,TRAVEL_STYL_1,TRAVEL_STYL_2,TRAVEL_STYL_3,TRAVEL_STYL_4,TRAVEL_STYL_5,TRAVEL_STYL_6,TRAVEL_STYL_7,TRAVEL_STYL_8,TRAVEL_MOTIVE_1,TRAVEL_NUM,TRAVEL_COMPANIONS_NUM,RESIDENCE_TIME_MIN_mean,RCMDTN_INTENTION_mean,REVISIT_YN_mean,TRAVEL_COMPANIONS_NUM_mean,REVISIT_INTENTION_mean
0,부평깡통시장,부산,중구,4,3,여,30,3.0,1.0,3.0,7.0,3.0,1.0,3.0,7.0,1.0,1.0,1.0,0.0,58.928571,3.964286,0.428571,1.214286,3.928571
1,용두산공원,부산,중구,2,3,여,30,3.0,1.0,3.0,7.0,3.0,1.0,3.0,7.0,1.0,1.0,1.0,0.0,52.941176,3.823529,0.705882,0.764706,3.705882
2,부산시립미술관,부산,해운대구,3,6,남,40,6.0,6.0,3.0,2.0,2.0,2.0,2.0,2.0,4.0,2.0,2.0,1.0,60.000000,4.000000,0.285714,0.571429,4.000000
3,신세계백화점 센텀시티점,부산,해운대구,4,2,남,30,4.0,3.0,2.0,1.0,5.0,6.0,6.0,5.0,6.0,3.0,3.0,1.0,94.054054,4.405405,0.567568,0.918919,4.351351
4,센텀시티 스파랜드,부산,해운대구,6,10,남,20,2.0,5.0,1.0,4.0,5.0,6.0,5.0,3.0,3.0,10.0,4.0,0.0,185.454545,4.545455,0.272727,1.363636,4.363636
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1154,부산 현대미술관,부산,사하구,3,6,남,60,1.0,3.0,5.0,5.0,3.0,3.0,5.0,3.0,3.0,3.0,5.0,1.0,114.000000,4.800000,0.400000,0.800000,4.800000
1155,광안리해수욕장,부산,수영구,1,21,여,20,3.0,5.0,3.0,4.0,5.0,5.0,5.0,5.0,3.0,3.0,2.0,1.0,59.132948,4.641618,0.774566,1.196532,4.612717
1156,BIFF 광장,부산,중구,4,1,남,40,2.0,2.0,2.0,1.0,6.0,2.0,5.0,3.0,7.0,2.0,2.0,2.0,38.571429,3.571429,0.571429,1.000000,3.571429
1157,용두산공원,부산,중구,1,1,남,40,2.0,2.0,2.0,1.0,6.0,2.0,5.0,3.0,7.0,2.0,2.0,2.0,52.941176,3.823529,0.705882,0.764706,3.705882


In [60]:
model = CatBoostRegressor(cat_features=category_features, random_state=42, task_type="GPU", verbose=100)
model.fit(x_train, y_train)

Learning rate set to 0.041182
0:	learn: 0.8018685	total: 43.8ms	remaining: 43.8s
100:	learn: 0.6760623	total: 1.5s	remaining: 13.3s
200:	learn: 0.6327541	total: 2.96s	remaining: 11.8s
300:	learn: 0.6016175	total: 4.48s	remaining: 10.4s
400:	learn: 0.5772931	total: 5.92s	remaining: 8.85s
500:	learn: 0.5552101	total: 7.38s	remaining: 7.35s
600:	learn: 0.5378839	total: 8.81s	remaining: 5.85s
700:	learn: 0.5248035	total: 10.3s	remaining: 4.4s
800:	learn: 0.5101444	total: 11.8s	remaining: 2.93s
900:	learn: 0.4974854	total: 13.3s	remaining: 1.46s
999:	learn: 0.4887996	total: 14.6s	remaining: 0us


<catboost.core.CatBoostRegressor at 0x2972bc9c950>

In [61]:
joblib.dump(model, path + '/catboost_model_B.pkl')

['C:\\workspace\\Ko-Swipe-ML\\data\\all/catboost_model_B.pkl']

In [3]:
path = r"C:\workspace\Ko-Swipe-ML\data\all"

modelb = joblib.load(path + '/catboost_model_B.pkl')
testb = pd.read_csv(path + '/관광지 추천시스템 Testset_B.csv')
trainb = pd.read_csv(path + '/관광지 추천시스템 Trainset_B.csv')

In [4]:
X_testb = testb.drop(['DGSTFN'], axis = 1)
y_testb = testb['DGSTFN']

In [5]:
# 유저정보
user_info = testb[
    [
        "TRAVEL_ID",
        "SIDO",
        "GUNGU",
        "TRAVEL_MISSION_PRIORITY",
        "GENDER",
        "AGE_GRP",
        "INCOME",
        "TRAVEL_STYL_1",
        "TRAVEL_STYL_2",
        "TRAVEL_STYL_3",
        "TRAVEL_STYL_4",
        "TRAVEL_STYL_5",
        "TRAVEL_STYL_6",
        "TRAVEL_STYL_7",
        "TRAVEL_STYL_8",
        "TRAVEL_MOTIVE_1",
        "TRAVEL_NUM",
        "TRAVEL_COMPANIONS_NUM",
    ]
]

In [6]:
new_user_info = pd.DataFrame(columns=['TRAVEL_ID', 'TRAVEL_MISSION_PRIORITY', 'GENDER', 'AGE_GRP', 'INCOME',
                            'TRAVEL_STYL_1', 'TRAVEL_STYL_2', 'TRAVEL_STYL_3', 'TRAVEL_STYL_4',
                            'TRAVEL_STYL_5', 'TRAVEL_STYL_6', 'TRAVEL_STYL_7', 'TRAVEL_STYL_8',
                            'TRAVEL_MOTIVE_1', 'TRAVEL_NUM', 'TRAVEL_COMPANIONS_NUM', 'sido_gungu_list'])

for i in tqdm(list(user_info['TRAVEL_ID'].unique())):
    user_info_filtered = user_info[user_info['TRAVEL_ID'] == i]
    userLocations = user_info_filtered[['SIDO', 'GUNGU']] #각 유저별 방문한 시군구 확인
    userLocations.reset_index(drop = True, inplace = True)
    sido_gungu_visit = []
    for j in range(len(userLocations)):
        sido_gungu_visit.append(userLocations['SIDO'][j] + '+' + userLocations['GUNGU'][j])
    sido_gungu_list = list(set(sido_gungu_visit))
    new = user_info_filtered.drop(['SIDO', 'GUNGU'], axis = 1) #기존 시도, 군구 제외하고
    new = new.head(1)
    new['sido_gungu_list'] = str(sido_gungu_list)
    new_user_info = pd.concat([new_user_info, new], axis = 0) #새로운 데이터프레임 생성        
    

100%|██████████| 237/237 [00:00<00:00, 1194.94it/s]


In [7]:
new_user_info.reset_index(drop = True, inplace = True)
new_user_info.to_csv(path + '/관광지 추천시스템 Testset_B- 유저 정보.csv', index=False)

In [8]:
#여행지 정보
info = trainb[['SIDO', 'VISIT_AREA_NM', 'GUNGU','VISIT_AREA_TYPE_CD','RESIDENCE_TIME_MIN_mean', 'RCMDTN_INTENTION_mean',
            'REVISIT_YN_mean', 'TRAVEL_COMPANIONS_NUM_mean', 'REVISIT_INTENTION_mean']]
info.drop_duplicates(['VISIT_AREA_NM'], inplace = True)

In [9]:
###### 2회 이상 관광한 방문지 리스트 생성
visiting_list = trainb[['VISIT_AREA_NM']] #train set에 있는 방문지에 대해서만 2회 이상 방문하였는지 확인
visiting_list.reset_index(drop = True, inplace = True)
#데이터 전처리
visit_count_df = pd.DataFrame(visiting_list.value_counts(), columns = ['count'])
visit_count_df['VISIT_AREA_NM'] = visit_count_df.index
visit_count_df.reset_index(drop = True, inplace = True)
for i in range(len(visit_count_df)):
    visit_count_df['VISIT_AREA_NM'][i] = str(visit_count_df['VISIT_AREA_NM'][i])
    visit_count_df['VISIT_AREA_NM'][i] = visit_count_df['VISIT_AREA_NM'][i].replace("(","").replace(")","").replace(",","").replace("\''","")
    visit_count_df['VISIT_AREA_NM'][i] = visit_count_df['VISIT_AREA_NM'][i][1:-1]
#5회 이상 적용
visit_count_df = visit_count_df[visit_count_df['count'] >= 5]  
visit_list = list(visit_count_df['VISIT_AREA_NM']) #visit_list에 5회 이상 방문지 리스트

In [10]:
#방문지가 5회 이상 방문한 관광지 아니면 제거
info.reset_index(drop = True, inplace = True)
for i in tqdm(range(len(info))):
    if info['VISIT_AREA_NM'][i] not in visit_list:
        info = info.drop([i], axis = 0)
info.reset_index(drop = True, inplace = True)

100%|██████████| 789/789 [00:00<00:00, 9654.09it/s]


In [11]:
#여행자 정보 저장
info.reset_index(drop = True, inplace = True)
info.to_csv(path + '/관광지 추천시스템 Testset_B- 여행지 정보.csv', index=False)

In [12]:
user_info = pd.read_csv(path + '/관광지 추천시스템 Testset_B- 유저 정보.csv')
info = pd.read_csv(path + '/관광지 추천시스템 Testset_B- 여행지 정보.csv')

In [13]:
new_user_info['TRAVEL_MISSION_PRIORITY'] = new_user_info['TRAVEL_MISSION_PRIORITY'].astype('int')
new_user_info['AGE_GRP'] = new_user_info['AGE_GRP'].astype('int')

In [114]:
test_data = pd.DataFrame(user_info.iloc[4]).T

test_data.drop(['TRAVEL_ID'], axis = 1, inplace = True)

test_data

Unnamed: 0,TRAVEL_MISSION_PRIORITY,GENDER,AGE_GRP,INCOME,TRAVEL_STYL_1,TRAVEL_STYL_2,TRAVEL_STYL_3,TRAVEL_STYL_4,TRAVEL_STYL_5,TRAVEL_STYL_6,TRAVEL_STYL_7,TRAVEL_STYL_8,TRAVEL_MOTIVE_1,TRAVEL_NUM,TRAVEL_COMPANIONS_NUM,sido_gungu_list
4,22.0,여,20.0,4.0,7.0,7.0,3.0,6.0,4.0,5.0,7.0,3.0,7.0,4.0,0.0,"['부산+해운대구', '부산+기장군']"


In [120]:
test_data.to_pickle("test_data.pkl")

In [125]:
import pandas as pd

# Define column lists
final_columns = [
    "VISIT_AREA_NM", "SIDO", "GUNGU", "VISIT_AREA_TYPE_CD", "TRAVEL_MISSION_PRIORITY", "GENDER", "AGE_GRP", "INCOME",
    "TRAVEL_STYL_1", "TRAVEL_STYL_2", "TRAVEL_STYL_3", "TRAVEL_STYL_4", "TRAVEL_STYL_5", "TRAVEL_STYL_6", 
    "TRAVEL_STYL_7", "TRAVEL_STYL_8", "TRAVEL_MOTIVE_1", "TRAVEL_NUM", "TRAVEL_COMPANIONS_NUM", 
    "RESIDENCE_TIME_MIN_mean", "RCMDTN_INTENTION_mean", "REVISIT_YN_mean", "TRAVEL_COMPANIONS_NUM_mean", 
    "REVISIT_INTENTION_mean"
]

user_columns = [
    "SIDO", "TRAVEL_MISSION_PRIORITY", "GENDER", "AGE_GRP", "INCOME", "TRAVEL_STYL_1", "TRAVEL_STYL_2",
    "TRAVEL_STYL_3", "TRAVEL_STYL_4", "TRAVEL_STYL_5", "TRAVEL_STYL_6", "TRAVEL_STYL_7", "TRAVEL_STYL_8",
    "TRAVEL_MOTIVE_1", "TRAVEL_NUM", "TRAVEL_COMPANIONS_NUM"
]

features = final_columns

def convert_float_to_int(df):
    float_cols = df.select_dtypes(include=['float']).columns
    
    for col in float_cols:
        df[col] = df[col].astype(int)
    
    return df

def preprocess_places_list(places_list_str):
    """
    Preprocesses the places list string into a list of places.

    Parameters:
    places_list_str (str): String representation of the places list.

    Returns:
    list: A list of places.
    """
    places_list = places_list_str.replace("[", "").replace("]", "").replace("'", "").replace(", ", ",")
    return list(map(str, places_list.split(",")))

def generate_final_df(info, new_user_info, places_list):
    """
    Generates the final DataFrame based on user information and places list.

    Parameters:
    info (DataFrame): DataFrame containing area information.
    new_user_info (DataFrame): DataFrame containing new user information.
    places_list (list): List of places.

    Returns:
    DataFrame: The final DataFrame containing combined user and area information.
    """
    final_df = pd.DataFrame(columns=final_columns)
    
    for place in places_list:
        sido, gungu = map(str, place.split("+"))
        info_df = info[(info["SIDO"] == sido) & (info["GUNGU"] == gungu)].drop(["SIDO"], axis=1).reset_index(drop=True)
        user_data = new_user_info.drop(["sido_gungu_list"], axis=1).values.tolist()[0]
        user_data = [sido] + user_data
        user_df = pd.DataFrame([user_data] * len(info_df), columns=user_columns)
        df = pd.concat([user_df, info_df], axis=1)[features]
        df["VISIT_AREA_TYPE_CD"] = df["VISIT_AREA_TYPE_CD"].astype("string")
        final_df = pd.concat([final_df, df], axis=0)
        
    final_df.reset_index(drop=True, inplace=True)
    final_df.drop_duplicates(["VISIT_AREA_NM"], inplace=True)
    return final_df

def recommend_places(model, final_df):
    """
    Recommends places based on the model's predictions.

    Parameters:
    model: The predictive model.
    final_df (DataFrame): The final DataFrame containing combined user and area information.

    Returns:
    list: List of recommended places.
    """
    final_df = convert_float_to_int(final_df)
    y_pred = model.predict(final_df)
    y_pred_df = pd.DataFrame(y_pred, columns=["y_pred"])
    sorted_df = pd.concat([final_df, y_pred_df], axis=1).sort_values(by="y_pred", ascending=False).iloc[:10]
    return list(sorted_df["VISIT_AREA_NM"])


def generate_user_info_df(final_df):
    """
    Generates a DataFrame containing user information.

    Parameters:
    final_df (DataFrame): The final DataFrame containing combined user and area information.

    Returns:
    DataFrame: DataFrame containing user information.
    """
    return final_df[user_columns]

def main(info, new_user_info, model):
    """
    Main function to generate recommendations and user information.

    Parameters:
    info (DataFrame): DataFrame containing area information.
    new_user_info (DataFrame): DataFrame containing new user information.
    model: The predictive model.

    Returns:
    list: A list containing user information and recommended places.
    """
    result = []
    places_list_str = new_user_info["sido_gungu_list"].values[0]
    places_list = preprocess_places_list(places_list_str)
    final_df = generate_final_df(info, new_user_info, places_list)
    
    visiting_candidates = recommend_places(model, final_df)
    user_info_df = generate_user_info_df(final_df)
    
    if len(user_info_df) == 0:
        result.append([])
    else:
        rec = user_info_df.iloc[0].to_list()
        rec.append(visiting_candidates)
        result.append(rec)
    
    return result

#Example usage:
result = main(info, test_data, modelb)
print(result)


[['부산', 22, '여', 20, 4, 7, 7, 3, 6, 4, 5, 7, 3, 7, 4, 0, ['부산 시립미술관', '부산시립미술관', '해운대 모래축제', '이케아 동부산점', '해동용궁사', '해운대블루라인파크 송정정거장', '센텀시티 스파랜드', '해운대블루라인파크 미포정거장', '죽성 드림세트장', '청사포다릿돌전망대']]]
