In [None]:
import sys
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

def filter_camp_data(camp, selected_do, selected_gu, selected_faclt, selected_lct, selected_induty, selected_bottom, selected_sbrs):
    # 선택된 값을 포함하는 행만 필터링, 빈 문자열 무시
    if selected_do:
        camp = camp[camp['DO_NM'].str.contains(selected_do, na=False)]
    if selected_gu:
        camp = camp[camp['SIGUNGU_NM'].str.contains(selected_gu, na=False)]
    if selected_faclt:
        camp = camp[camp['FACLT_DIV_NM'].str.contains(selected_faclt, na=False)]
    if selected_lct:
        camp = camp[camp['LCT_CL'].str.contains(selected_lct, na=False)]
    if selected_induty:
        camp = camp[camp['INDUTY'].str.contains(selected_induty, na=False)]
    if selected_bottom:
        bottom_mask = None
        if selected_bottom == '잔디':
            bottom_mask = camp['SITE_BOTTOM_CL1'].fillna(0) > 0
        elif selected_bottom == '파쇄석':
            bottom_mask = camp['SITE_BOTTOM_CL2'].fillna(0) > 0
        elif selected_bottom == '테크':
            bottom_mask = camp['SITE_BOTTOM_CL3'].fillna(0) > 0
        elif selected_bottom == '자갈':
            bottom_mask = camp['SITE_BOTTOM_CL4'].fillna(0) > 0
        elif selected_bottom == '맨흙':
            bottom_mask = camp['SITE_BOTTOM_CL5'].fillna(0) > 0
        if bottom_mask is not None:
            camp = camp[bottom_mask]
    if selected_sbrs:
        camp = camp[camp['SBRS_CL'].str.contains(selected_sbrs, na=False)]

    return camp

if __name__ == "__main__":
    # 데이터 로드
    file_path = 'E:/Student/API/GoCamping.pkl'
    camp = pd.read_pickle(file_path)
    
    selected_do = sys.argv[1]
    selected_gu = sys.argv[2]
    selected_faclt = sys.argv[3]
    selected_lct = sys.argv[4]
    selected_induty = sys.argv[5]
    selected_bottom = sys.argv[6]
    selected_sbrs = sys.argv[7]

    filtered_camp = filter_camp_data(camp, selected_do, selected_gu, selected_faclt, selected_lct, selected_induty, selected_bottom, selected_sbrs)
    
    # 결과 확인
    sys.stdout.reconfigure(encoding='utf-8')
    print(filtered_camp.to_json(orient='records', force_ascii=False))

    # member_rate 데이터 로드
    member_rate = pd.read_pickle('E:/Student/API/memberRate.pkl')

    # surprise 라이브러리를 사용한 추천 시스템 구현
    # 필터링된 캠핑장의 content_id 추출
    content_ids = filtered_camp['CONTENT_ID'].tolist()

    # 필터링된 content_id에 해당하는 rating 데이터 필터링
    filtered_ratings = member_rate[member_rate['contentId'].isin(content_ids)]

    # surprise용 데이터 준비
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(filtered_ratings[['no_data', 'contentId', 'rating']], reader)

    # train-test split
    trainset, testset = train_test_split(data, test_size=0.25)

    # SVD 모델 학습
    svd = SVD()
    svd.fit(trainset)

    # 예측 및 평가
    predictions = svd.test(testset)
    accuracy.rmse(predictions)

    # 추천 결과 출력
    user_id = int(sys.argv[8])  # 사용자 ID 입력
    user_ratings = filtered_ratings[filtered_ratings['no_data'] == user_id]
    user_unrated_items = [item for item in content_ids if item not in user_ratings['contentId'].tolist()]

    # 추천 항목 생성
    recommendations = []
    for item_id in user_unrated_items:
        est_rating = svd.predict(user_id, item_id).est
        recommendations.append((item_id, est_rating))

    recommendations.sort(key=lambda x: x[1], reverse=True)

    # 상위 10개 추천 항목 출력
    top_10_recommendations = recommendations[:10]
    for item_id, rating in top_10_recommendations:
        print(f"CONTENT_ID: {item_id}, Predicted Rating: {rating:.2f}")


In [34]:
import sys
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

def filter_camp_data(camp, selected_do, selected_gu, selected_faclt, selected_lct, selected_induty, selected_bottom, selected_sbrs):
    # 선택된 값을 포함하는 행만 필터링, 빈 문자열 무시
    if selected_do:
        camp = camp[camp['DO_NM'].str.contains(selected_do, na=False)]
    if selected_gu:
        camp = camp[camp['SIGUNGU_NM'].str.contains(selected_gu, na=False)]
    if selected_faclt:
        camp = camp[camp['FACLT_DIV_NM'].str.contains(selected_faclt, na=False)]
    if selected_lct:
        camp = camp[camp['LCT_CL'].str.contains(selected_lct, na=False)]
    if selected_induty:
        camp = camp[camp['INDUTY'].str.contains(selected_induty, na=False)]
    if selected_bottom:
        bottom_mask = None
        if selected_bottom == '잔디':
            bottom_mask = camp['SITE_BOTTOM_CL1'].fillna(0) > 0
        elif selected_bottom == '파쇄석':
            bottom_mask = camp['SITE_BOTTOM_CL2'].fillna(0) > 0
        elif selected_bottom == '테크':
            bottom_mask = camp['SITE_BOTTOM_CL3'].fillna(0) > 0
        elif selected_bottom == '자갈':
            bottom_mask = camp['SITE_BOTTOM_CL4'].fillna(0) > 0
        elif selected_bottom == '맨흙':
            bottom_mask = camp['SITE_BOTTOM_CL5'].fillna(0) > 0
        if bottom_mask is not None:
            camp = camp[bottom_mask]
    if selected_sbrs:
        camp = camp[camp['SBRS_CL'].str.contains(selected_sbrs, na=False)]

    return camp

if __name__ == "__main__":
    # 데이터 로드
    file_path = 'E:/Student/API/GoCamping.pkl'
    camp = pd.read_pickle(file_path)
    
    selected_do = "경기도"
    selected_gu = ""
    selected_faclt = ""
    selected_lct = ""
    selected_induty = ""
    selected_bottom = ""
    selected_sbrs = ""

    filtered_camp = filter_camp_data(camp, selected_do, selected_gu, selected_faclt, selected_lct, selected_induty, selected_bottom, selected_sbrs)

    # member_rate 데이터 로드
    member_rate = pd.read_pickle('E:/Student/API/memberRate.pkl')

    # surprise 라이브러리를 사용한 추천 시스템 구현
    # 필터링된 캠핑장의 content_id 추출
    content_ids = filtered_camp['CONTENT_ID'].tolist()

    # 필터링된 content_id에 해당하는 rating 데이터 필터링
    filtered_ratings = member_rate[member_rate['CONTENT_ID'].isin(content_ids)]

    # surprise용 데이터 준비
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(filtered_ratings[['NO_DATA', 'CONTENT_ID', 'RATING']], reader)

    # train-test split
    trainset, testset = train_test_split(data, test_size=0.25)

    # 최적의 하이퍼파라미터를 사용하여 SVD 모델 학습
    svd = SVD(n_factors=50, n_epochs=20, lr_all=0.005, reg_all=0.2)
    svd.fit(trainset)

    # 예측 및 평가
    predictions = svd.test(testset)
    accuracy.rmse(predictions)

    # 추천 결과 출력
    user_id = int(1)  # 사용자 ID 입력
    user_ratings = filtered_ratings[filtered_ratings['NO_DATA'] == user_id]
    user_unrated_items = [item for item in content_ids if item not in user_ratings['CONTENT_ID'].tolist()]

    # 추천 항목 생성
    recommendations = []
    for item_id in user_unrated_items:
        est_rating = svd.predict(user_id, item_id).est
        recommendations.append((item_id, est_rating))

    recommendations.sort(key=lambda x: x[1], reverse=True)

    # 상위 10개 추천 항목 출력
    top_10_recommendations = recommendations[:10]
    for item_id, rating in top_10_recommendations:
        print(f"CONTENT_ID: {item_id}, Predicted Rating: {rating:.2f}")


RMSE: 1.1872
CONTENT_ID: 7230, Predicted Rating: 3.59
CONTENT_ID: 100631, Predicted Rating: 3.58
CONTENT_ID: 1865, Predicted Rating: 3.58
CONTENT_ID: 101111, Predicted Rating: 3.56
CONTENT_ID: 652, Predicted Rating: 3.55
CONTENT_ID: 1231, Predicted Rating: 3.55
CONTENT_ID: 1830, Predicted Rating: 3.53
CONTENT_ID: 7202, Predicted Rating: 3.50
CONTENT_ID: 3213, Predicted Rating: 3.47
CONTENT_ID: 1951, Predicted Rating: 3.47


In [23]:
from collections import defaultdict

def precision_recall_at_k(predictions, k=10, threshold=3.5):
    """Return precision and recall at k metrics for each user"""

    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()

    for uid, user_ratings in user_est_true.items():
        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold))
                              for (est, true_r) in user_ratings[:k])

        # Precision@K: Proportion of recommended items that are relevant
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1

        # Recall@K: Proportion of relevant items that are recommended
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1

    return precisions, recalls

# 임계값(threshold)을 3.5로 설정하고, 상위 10개의 추천 항목에 대해 Precision과 Recall 계산
precisions, recalls = precision_recall_at_k(predictions, k=10, threshold=3.5)

# Precision과 Recall의 평균 계산
avg_precision = sum(precisions.values()) / len(precisions)
avg_recall = sum(recalls.values()) / len(recalls)

print(f'Average Precision: {avg_precision}')
print(f'Average Recall: {avg_recall}')


Average Precision: 0.35361111111111104
Average Recall: 0.005182707283869257


In [40]:
import sys
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split

def filter_camp_data(camp, selected_do, selected_gu, selected_faclt, selected_lct, selected_induty, selected_bottom, selected_sbrs):
    if selected_do:
        camp = camp[camp['DO_NM'].str.contains(selected_do, na=False)]
    if selected_gu:
        camp = camp[camp['SIGUNGU_NM'].str.contains(selected_gu, na=False)]
    if selected_faclt:
        camp = camp[camp['FACLT_DIV_NM'].str.contains(selected_faclt, na=False)]
    if selected_lct:
        camp = camp[camp['LCT_CL'].str.contains(selected_lct, na=False)]
    if selected_induty:
        camp = camp[camp['INDUTY'].str.contains(selected_induty, na=False)]
    if selected_bottom:
        bottom_mask = None
        if selected_bottom == '잔디':
            bottom_mask = camp['SITE_BOTTOM_CL1'].fillna(0) > 0
        elif selected_bottom == '파쇄석':
            bottom_mask = camp['SITE_BOTTOM_CL2'].fillna(0) > 0
        elif selected_bottom == '테크':
            bottom_mask = camp['SITE_BOTTOM_CL3'].fillna(0) > 0
        elif selected_bottom == '자갈':
            bottom_mask = camp['SITE_BOTTOM_CL4'].fillna(0) > 0
        elif selected_bottom == '맨흙':
            bottom_mask = camp['SITE_BOTTOM_CL5'].fillna(0) > 0
        if bottom_mask is not None:
            camp = camp[bottom_mask]
    if selected_sbrs:
        camp = camp[camp['SBRS_CL'].str.contains(selected_sbrs, na=False)]

    return camp

def get_cosine_similar_items(user_rated_camps, camp_data):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(camp_data['FACLT_NM'])
    
    cosine_similarities = np.zeros(tfidf_matrix.shape[0])
    for camp in user_rated_camps:
        user_query_vector = vectorizer.transform([camp])
        cosine_similarities += cosine_similarity(user_query_vector, tfidf_matrix).flatten()
    
    cosine_similarities /= len(user_rated_camps)
    camp_data = camp_data.copy()
    camp_data.loc[:, 'cosine_similarity'] = cosine_similarities
    return camp_data.sort_values(by='cosine_similarity', ascending=False)

if __name__ == "__main__":
    # 데이터 로드
    file_path = 'E:/Student/API/GoCamping.pkl'
    camp = pd.read_pickle(file_path)
    
    selected_do = "경기도"
    selected_gu = ""
    selected_faclt = ""
    selected_lct = ""
    selected_induty = ""
    selected_bottom = ""
    selected_sbrs = ""

    filtered_camp = filter_camp_data(camp, selected_do, selected_gu, selected_faclt, selected_lct, selected_induty, selected_bottom, selected_sbrs)

    # member_rate 데이터 로드
    member_rate = pd.read_pickle('E:/Student/API/memberRate.pkl')

    # 사용자 ID 설정
    user_id = int(1)

    # 사용자가 RATING을 부여한 모든 CONTENT_ID 추출
    user_rated_camps = member_rate[member_rate['NO_DATA'] == user_id]['CONTENT_ID'].tolist()
    user_rated_camps_names = camp[camp['CONTENT_ID'].isin(user_rated_camps)]['FACLT_NM'].tolist()

    # 코사인 유사도 기반 추천
    filtered_camp = get_cosine_similar_items(user_rated_camps_names, filtered_camp)

    # 필터링된 캠핑장의 content_id 추출
    content_ids = filtered_camp['CONTENT_ID'].tolist()

    # 필터링된 content_id에 해당하는 rating 데이터 필터링
    filtered_ratings = member_rate[member_rate['CONTENT_ID'].isin(content_ids)]

    # surprise용 데이터 준비
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(filtered_ratings[['NO_DATA', 'CONTENT_ID', 'RATING']], reader)

    # train-test split
    trainset, testset = train_test_split(data, test_size=0.25)

    # 최적의 하이퍼파라미터를 사용하여 SVD 모델 학습
    svd = SVD(n_factors=50, n_epochs=20, lr_all=0.005, reg_all=0.2)
    svd.fit(trainset)

    # 예측 및 평가
    predictions = svd.test(testset)
    print(f"RMSE: {accuracy.rmse(predictions)}")

    # 추천 결과 출력
    user_ratings = filtered_ratings[filtered_ratings['NO_DATA'] == user_id]
    user_unrated_items = [item for item in content_ids if item not in user_ratings['CONTENT_ID'].tolist()]

    # 추천 항목 생성
    recommendations = []
    for item_id in user_unrated_items:
        est_rating = svd.predict(user_id, item_id).est
        recommendations.append((item_id, est_rating))

    recommendations.sort(key=lambda x: x[1], reverse=True)

    # 상위 10개 추천 항목 출력
    top_10_recommendations = recommendations[:10]
    for item_id, rating in top_10_recommendations:
        print(f"CONTENT_ID: {item_id}, Predicted Rating: {rating:.2f}")


RMSE: 1.1777
RMSE: 1.1776808554653673
CONTENT_ID: 101111, Predicted Rating: 3.63
CONTENT_ID: 1951, Predicted Rating: 3.60
CONTENT_ID: 3472, Predicted Rating: 3.57
CONTENT_ID: 7230, Predicted Rating: 3.55
CONTENT_ID: 2560, Predicted Rating: 3.51
CONTENT_ID: 101244, Predicted Rating: 3.49
CONTENT_ID: 7028, Predicted Rating: 3.47
CONTENT_ID: 101037, Predicted Rating: 3.40
CONTENT_ID: 70, Predicted Rating: 3.38
CONTENT_ID: 6785, Predicted Rating: 3.37
