In [4]:
!pip install pycaret

In [90]:
import pandas as pd
import numpy as np
import joblib
import json
from sklearn.preprocessing import MinMaxScaler
from pycaret.regression import predict_model


similarity_df_path = '/content/similarity_df.csv'
master_visit_all_path = '/content/master_visit_all.csv'
model_path = '/content/bayesian_regression.pkl'
trip_path = '/content/trip_df (1).csv'

def combined_recommendation(input_order, similarity_df_path, trip_path, model_path, master_visit_all_path, user_prefer, user_features):

    similarity_df = pd.read_csv(similarity_df_path)
    trip_df = pd.read_csv(trip_path)

    # 5개 장소의 tourist_id 정의 (입력받는 순서를 기준으로 사용할 장소들)
    place_ids = ['CNTS_200000000010956', 'CONT_000000000500103', 'CNTS_000000000022353', 'CNTS_000000000022082', 'CNTS_000000000022063']

    # 가중치를 순서대로 정의
    weights = [2.0, 1.5, 0.8, 0.5, 0.3]

    # 입력 순서를 기반으로 place_ids의 순서를 재정렬
    ordered_places = [place_ids[i - 1] for i in input_order]
    ordered_weights = [weights[i - 1] for i in input_order]

    # 선택된 장소들에 가중치를 곱한 유사도 합계를 계산
    weighted_sim = np.zeros(similarity_df.shape[0])

    for place, weight in zip(ordered_places, ordered_weights):
        weighted_sim += similarity_df[place].values * weight

    # 선호 카테고리에 해당하는 관광지 가중치 추가
    trip_df['category_match'] = trip_df['category'].apply(lambda x: 2 if x in user_prefer else 0)

    # weighted_similarity에 선호 카테고리 가중치 추가
    similarity_df['weighted_similarity'] = weighted_sim + trip_df['category_match']

    # tourist_id와 weighted_similarity를 함께 반환
    sorted_df = similarity_df[['tourist_id', 'weighted_similarity']].sort_values(by='weighted_similarity', ascending=False)
    sorted_df = pd.merge(sorted_df, trip_df, on='tourist_id', how='left')
    sorted_df = sorted_df[~sorted_df['tourist_id'].isin(ordered_places)]
    sorted_df.rename(columns={'VISIT_AREA_NM':'Place'}, inplace = True)

    # weighted_similarity 열에 대해 Min-Max Scaling 적용
    scaler = MinMaxScaler()
    sorted_df['weighted_similarity'] = scaler.fit_transform(sorted_df[['weighted_similarity']])

    # 모델과 데이터 로드
    model = joblib.load(model_path)
    master_visit_all = pd.read_csv(master_visit_all_path)

    # 필요한 컬럼만 선택
    master_visit_all = master_visit_all[['VISIT_AREA_NM', 'VISIT_AREA_NM_encoded', 'GENDER', 'AGE_GRP', 'TRAVEL_STYL_1', 'TRAVEL_STYL_2', 'TRAVEL_STYL_3', 'TRAVEL_STYL_4', 'total_score']]

    # 유저 입력 데이터 생성
    user_prefer = pd.DataFrame(user_features)

    # 'TRAVEL_STYL_3' 수정
    user_prefer['TRAVEL_STYL_3'] = user_prefer['TRAVEL_STYL_3'].replace({1: '1', 2: '1', 3: '2', 4: '3', 5: '3'}).astype(int)
    user_prefer['TRAVEL_STYL_3'] = user_prefer['TRAVEL_STYL_3'].astype(int)

    # 방문 지역 데이터 준비
    visit_areas = master_visit_all['VISIT_AREA_NM_encoded'].drop_duplicates().dropna(axis=0).tolist()
    repeated_visits = np.tile(visit_areas, len(user_prefer))

    # 오버샘플링된 테스트 데이터프레임 생성
    user_prefer_dict = {
        'GENDER': np.repeat(user_prefer['GENDER'], len(visit_areas)),
        'AGE_GRP': np.repeat(user_prefer['AGE_GRP'], len(visit_areas)),
        'TRAVEL_STYL_1': np.repeat(user_prefer['TRAVEL_STYL_1'], len(visit_areas)),
        'TRAVEL_STYL_2': np.repeat(user_prefer['TRAVEL_STYL_2'], len(visit_areas)),
        'TRAVEL_STYL_3': np.repeat(user_prefer['TRAVEL_STYL_3'], len(visit_areas)),
        'TRAVEL_STYL_4': np.repeat(user_prefer['TRAVEL_STYL_4'], len(visit_areas)),
        'VISIT_AREA_NM_encoded': repeated_visits
    }

    user_prefer = pd.DataFrame(user_prefer_dict).reset_index(drop=True).drop_duplicates()
    user_prefer = user_prefer[['VISIT_AREA_NM_encoded', 'GENDER', 'AGE_GRP', 'TRAVEL_STYL_1', 'TRAVEL_STYL_2', 'TRAVEL_STYL_3', 'TRAVEL_STYL_4']]

    # 예측
    predictions = model.predict(user_prefer)
    user_prefer['output'] = predictions

    # tourist_id와 VISIT_AREA_NM의 매핑
    label_map = dict(zip(master_visit_all['VISIT_AREA_NM_encoded'], master_visit_all['VISIT_AREA_NM']))
    user_prefer['VISIT_AREA_NM'] = user_prefer['VISIT_AREA_NM_encoded'].map(label_map)

    result = user_prefer[['VISIT_AREA_NM', 'output']].sort_values(by='output', ascending=False)

    #### 병합하는 부분은 따로 보자
    # 결과 병합
    combined_df = pd.merge(result, sorted_df, left_on='VISIT_AREA_NM', right_on='tourist_x', how='outer')

    # output 열과 weighted_similarity 열을 결합
    combined_df['Combined_weighted_similarity'] = combined_df.apply(
        lambda row: (row['output'] / 2 + row['weighted_similarity'] / 2) if pd.notna(row['output']) and pd.notna(row['weighted_similarity'])
        else (row['output'] if pd.notna(row['output']) else row['weighted_similarity']),
        axis=1)

    # 필요한 열만 선택하고 정렬
    final_recommendations_df = combined_df[['tourist_x', 'Combined_weighted_similarity']].rename(columns={'Combined_weighted_similarity': 'weighted_similarity'}).sort_values(by='weighted_similarity', ascending=False)

    final_recommendations = final_recommendations_df.set_index('tourist_x')['weighted_similarity'].to_dict()

    return final_recommendations

In [91]:
user_features = {
    'GENDER': [1],
    'AGE_GRP': [20],
    'TRAVEL_STYL_1': [2],
    'TRAVEL_STYL_2': [2],
    'TRAVEL_STYL_3': [2],
    'TRAVEL_STYL_4': [2]
}

input_order = [2, 4, 3, 1, 5]
user_prefer = ['역사유적지']

In [92]:
combined_recommendation(input_order, similarity_df_path, trip_path, model_path, master_visit_all_path, user_prefer, user_features)

{'백제사 템플스테이': 1.0,
 '제주4.3평화공원': 0.9997430565949681,
 '정의향교': 0.9981164722841314,
 '금룡사 템플스테이': 0.9974583593353663,
 '오름터 민속마을': 0.9971179144474647,
 '칠머리길': 0.9971109703288292,
 '송당본향당': 0.9965028966759901,
 '법화사지': 0.9964085984368793,
 '백제사': 0.9962645846959259,
 '약천사 템플스테이': 0.9957917057006227,
 '불탑사': 0.9952494148419542,
 '방주교회': 0.9951825716105238,
 '추사적거지': 0.9950208581089399,
 '효명사': 0.9943594808441618,
 '삼사석': 0.9942027556642719,
 '순례자의교회': 0.9938028581798057,
 '천왕사': 0.9936633384587015,
 '마라도성당': 0.9931421188840128,
 '연북정': 0.9931365880094154,
 '제주 삼양동 유적': 0.9930203535356701,
 '대장금촬영지': 0.9918478397711876,
 '송죽사터': 0.9913138204782936,
 '항파두리항몽유적지': 0.9913093075303765,
 '서귀포층패류화석산지': 0.9905187984854935,
 '대정향교': 0.9904685754942175,
 '성읍민속마을': 0.9902440946569242,
 '금붕사': 0.9899863752010989,
 '제주고산리유적': 0.9899816036212998,
 '참굴비조형물': 0.9898971456503973,
 '금룡사': 0.9898867044936,
 '가마오름 진지동굴': 0.9891393394976589,
 '송악산 진지동굴': 0.9890318130212921,
 '성클라라수도원 금악성당': 0.9889371791968422