In [None]:
# 파이캐럿 패키지 설치
!pip install pycaret

In [None]:
import joblib
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from pycaret.regression import predict_model

# 입력받아야 하는 데이터 형태
similarity_df_path = '/content/extracted_sim_final.csv'
master_visit_all_path = '/content/master_visit_all.csv'
model_path = '/content/gbr_model.pkl'


user_preference_numbers = [1, 2, 3, 4, 5]
travel_style = {
    "GENDER": [0],
    "AGE_GRP": [20],
    "TRAVEL_STYL_1": [1],
    "TRAVEL_STYL_2": [1],
    "TRAVEL_STYL_3": [1],
    "TRAVEL_STYL_4": [1],
}


# recommendation 종합 함수
def recommendation(similarity_df_path, master_visit_all_path, model_path, user_preference_numbers, travel_style):
    # 모델과 데이터 로드
    model = joblib.load(model_path)
    master_visit_all = pd.read_csv(master_visit_all_path)
    similarity_df = pd.read_csv(similarity_df_path)
    similarity_df.set_index('Unnamed: 0', inplace = True)
    user_prefer = pd.DataFrame(travel_style)

    # 'TRAVEL_STYL_3' 컬럼 수정
    user_prefer['TRAVEL_STYL_3'] = user_prefer['TRAVEL_STYL_3'].replace({1: '1', 2: '1', 3: '2', 4: '3', 5: '3'})
    user_prefer['TRAVEL_STYL_3'] = user_prefer['TRAVEL_STYL_3'].astype(int)

    # 방문 지역 데이터 준비
    visit_areas = master_visit_all['VISIT_AREA_NM'].drop_duplicates().dropna(axis=0).tolist()
    repeated_visits = np.tile(visit_areas, len(user_prefer))

    # 오버샘플링된 테스트 데이터프레임 생성
    testing_dict = {
        'GENDER': np.repeat(user_prefer['GENDER'], len(visit_areas)),
        'AGE_GRP': np.repeat(user_prefer['AGE_GRP'], len(visit_areas)),
        'TRAVEL_STYL_1': np.repeat(user_prefer['TRAVEL_STYL_1'], len(visit_areas)),
        'TRAVEL_STYL_2': np.repeat(user_prefer['TRAVEL_STYL_2'], len(visit_areas)),
        'TRAVEL_STYL_3': np.repeat(user_prefer['TRAVEL_STYL_3'], len(visit_areas)),
        'TRAVEL_STYL_4': np.repeat(user_prefer['TRAVEL_STYL_4'], len(visit_areas)),
        'VISIT_AREA_NM': repeated_visits
    }

    testing = pd.DataFrame(testing_dict).reset_index(drop=True).drop_duplicates()

    # 예측
    result = predict_model(model, data=testing)
    scaler = MinMaxScaler()
    result['output'] = scaler.fit_transform(result[['prediction_label']])

    result = result[['VISIT_AREA_NM', 'output']].sort_values(by='output', ascending=False)

    # 추천 점수 계산
    # 장소와 번호의 매핑 정의
    place_to_number = {
        '안흥지 애련정': 1,
        'KT&G상상마당 홍대': 2,
        '명동난타극장': 3,
        '백운계곡관광지': 4,
        '소래역사관': 5
    }

    number_to_place = {v: k for k, v in place_to_number.items()}

    # 숫자 리스트를 장소 이름 리스트로 변환
    user_preferences = [number_to_place[num] for num in user_preference_numbers]

    # 가중치 정의
    weights = [1.0, 0.8, 0.5, 0.1, 0.05]
    score_dict = {place: weights[i] for i, place in enumerate(user_preferences)}

    # 각 장소의 총 점수를 계산
    total_scores = similarity_df.apply(lambda row: sum(score_dict.get(col, 0) * row[col] for col in user_preferences), axis=1)

    # Min-Max 스케일링
    total_scores_scaled = scaler.fit_transform(total_scores.values.reshape(-1, 1)).flatten()

    # 스케일링된 스코어와 장소를 딕셔너리로 묶기
    recommendations_dict = dict(zip(total_scores.index, total_scores_scaled))

    # 스케일링된 점수에 따라 내림차순으로 정렬
    recommendations_dict = dict(sorted(recommendations_dict.items(), key=lambda item: item[1], reverse=True))

    # 딕셔너리를 데이터프레임으로 변환
    recommendations_df = pd.DataFrame(list(recommendations_dict.items()), columns=['Place', 'Score'])

    # 결과 병합
    combined_df = pd.merge(result, recommendations_df, left_on='VISIT_AREA_NM', right_on='Place', how='outer')

    # VISIT_AREA_NM과 Place 중 하나라도 존재하는 경우 해당 값을 유지
    combined_df['VISIT_AREA_NM'] = combined_df.apply(
        lambda row: row['VISIT_AREA_NM'] if pd.notna(row['VISIT_AREA_NM']) else row['Place'], axis=1)

    # output 열과 Score 열을 결합
    combined_df['Combined_Score'] = combined_df.apply(
        lambda row: (row['output'] / 2 + row['Score'] / 2) if pd.notna(row['output']) and pd.notna(row['Score'])
        else (row['output'] if pd.notna(row['output']) else row['Score']),
        axis=1)

    # 필요한 열만 선택하고 정렬
    final_recommendations_df = combined_df[['VISIT_AREA_NM', 'Combined_Score']].rename(columns={'Combined_Score': 'Score'}).sort_values(by='Score', ascending=False)

    final_recommendations = final_recommendations_df.set_index('VISIT_AREA_NM')['Score'].to_dict()

    return final_recommendations