In [1]:
import pandas as pd

# Load data
data = pd.read_csv('국민여행조사데이터.csv', encoding='utf-8-sig')

# Convert string to numeric
data['만족도'] = data['만족도'].map({'매우 만족': 5, '만족': 4, '보통': 3, '불만족': 2, '매우 불만족': 1})
data['재방문의도'] = data['재방문의도'].map({'매우 높음': 5, '높음': 4, '보통': 3, '낮음': 2, '매우 낮음': 1})
data['타인추천의도'] = data['타인추천의도'].map({'매우 높음': 5, '높음': 4, '보통': 3, '낮음': 2, '매우 낮음': 1})

# Create a new column '평점' which is the average of '만족도', '재방문의도', '타인추천의도'
data['평점'] = data[['만족도', '재방문의도', '타인추천의도']].mean(axis=1)

# Define a function to categorize '동반자유형' into '가족' and '가족이 아닌 경우'
def categorize_companion_type(row):
    if '가족' in row:
        return '가족'
    else:
        return '가족이 아닌 경우'

# Apply the function to the '동반자유형' column
data['동반자유형'] = data['동반자유형'].apply(categorize_companion_type)

# Group by '시군구', '성별', '연령대', '동반자유형', and calculate the average '평점'
grouped_data = data.groupby(['시군구', '성별', '연령대', '동반자유형'])['평점'].mean().reset_index()

# Check the first few rows of the grouped data
grouped_data.head()

C:\Users\art\anaconda3\envs\tensorflow\lib\site-packages\numpy\.libs\libopenblas.NOIJJG62EMASZI6NYURL6JBKM4EVBGM7.gfortran-win_amd64.dll
C:\Users\art\anaconda3\envs\tensorflow\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll


Unnamed: 0,시군구,성별,연령대,동반자유형,평점
0,강원 강릉시,남,15~19세,가족,4.095238
1,강원 강릉시,남,15~19세,가족이 아닌 경우,4.0
2,강원 강릉시,남,20대,가족,4.148148
3,강원 강릉시,남,20대,가족이 아닌 경우,4.085546
4,강원 강릉시,남,30대,가족,4.059361


In [2]:
grouped_data = pd.read_csv('국민여행조사데이터.csv', encoding='utf-8-sig')

# Pivot the data to create a user-item matrix
user_item_matrix = grouped_data.pivot_table(index=['성별', '연령대', '동반자유형'], columns='시군구', values='평점')

# Fill NaN with 0
user_item_matrix.fillna(0, inplace=True)

# Calculate the Pearson correlation matrix
correlation_matrix = user_item_matrix.corr(method='pearson')

# Check the first few rows of the correlation matrix
correlation_matrix.head()


시군구,강원 강릉시,강원 고성군,강원 동해시,강원 삼척시,강원 속초시,강원 양구군,강원 양양군,강원 영월군,강원 원주시,강원 인제군,...,충북 단양군,충북 보은군,충북 영동군,충북 옥천군,충북 음성군,충북 제천시,충북 증평군,충북 진천군,충북 청주시,충북 충주시
시군구,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
강원 강릉시,1.0,-0.142962,-0.024984,0.098373,0.097257,0.018824,0.010426,-0.087033,0.307536,-0.022138,...,0.234743,0.177014,-0.214589,-0.005872,0.16859,0.166788,0.080624,0.108704,0.181508,-0.011653
강원 고성군,-0.142962,1.0,0.614155,0.687541,-0.859527,0.201772,-0.520689,0.679336,-0.021692,0.341519,...,0.001962,-0.021846,0.293589,0.409848,-0.044456,-0.028449,0.141951,-0.072359,-0.400374,0.520349
강원 동해시,-0.024984,0.614155,1.0,0.553901,-0.479203,0.024289,-0.418157,0.456902,0.146436,0.70104,...,0.142342,0.450549,0.483975,0.281069,-0.170022,0.040241,0.33574,-0.058128,-0.279324,0.509199
강원 삼척시,0.098373,0.687541,0.553901,1.0,-0.57206,0.262817,-0.053008,0.4936,0.665858,0.606386,...,0.683788,0.31423,0.429117,0.606429,-0.181146,0.639675,0.31373,0.199548,-0.657432,0.801185
강원 속초시,0.097257,-0.859527,-0.479203,-0.57206,1.0,-0.054011,0.417607,-0.538306,-0.02684,-0.117819,...,-0.037734,0.124341,-0.26659,-0.422526,-0.080373,-0.032835,-0.102651,-0.002425,0.232899,-0.389288


In [3]:
def recommend_places(input_places, num_recommendations=5):
    # Initialize an empty dataframe to store recommendations
    recommendations = pd.DataFrame()

    for place in input_places:
        # Get the row for the input place
        place_row = correlation_matrix.loc[place]
        
        # Get the places with the highest correlation with the input place
        similar_places = place_row.sort_values(ascending=False)
        
        # Remove the input place itself from the recommendations
        similar_places = similar_places.drop(input_places, errors='ignore')
        
        # Get the top num_recommendations places
        top_recommendations = similar_places.head(num_recommendations)
        
        # Add the top recommendations to the recommendations dataframe
        recommendations = pd.concat([recommendations, top_recommendations])
    
    return recommendations


In [6]:
places=['강원 강릉시', '광주 서구', '인천 중구']
recommend_places(places)

Unnamed: 0,0
경기 안산시,0.538435
충남 서산시,0.480635
경기 시흥시,0.480146
부산 수영구,0.468338
대구 동구,0.436997
광주 동구,0.746767
광주 광산구,0.725138
광주 북구,0.67899
부산 동래구,0.604433
대구 달서구,0.603588


In [20]:
def recommend_places(input_places, num_recommendations=5):
    # Initialize an empty dataframe to store recommendations
    recommendations = pd.DataFrame(columns=['Place', 'Score'])

    for place in input_places:
        # Get the row for the input place
        place_row = correlation_matrix.loc[place]
        
        # Get the places with the highest correlation with the input place
        similar_places = place_row.sort_values(ascending=False)
        
        # Remove the input place itself from the recommendations
        similar_places = similar_places.drop(place)
        
        # Get the top num_recommendations places
        top_recommendations = pd.DataFrame({'Place': similar_places.index, 'Score': similar_places.values}).head(num_recommendations)
        
        # Add the top recommendations to the recommendations dataframe
        recommendations = recommendations.append(top_recommendations, ignore_index=True)
    
    # Sort the recommendations by the score in descending order
    recommendations = recommendations.sort_values(by='Score', ascending=False)
    
    return recommendations


In [21]:
places=['강원 강릉시', '광주 서구', '인천 중구']
recommend_places(places)

  recommendations = recommendations.append(top_recommendations, ignore_index=True)
  recommendations = recommendations.append(top_recommendations, ignore_index=True)
  recommendations = recommendations.append(top_recommendations, ignore_index=True)


Unnamed: 0,Place,Score
5,광주 동구,0.746767
6,광주 광산구,0.725138
10,경북 경주시,0.691174
7,광주 북구,0.67899
11,경남 남해군,0.625067
8,부산 동래구,0.604433
9,대구 달서구,0.603588
12,경기 파주시,0.547535
0,경기 안산시,0.538435
13,서울 영등포구,0.483504


In [29]:
def recommend_places(input_places, num_recommendations=5):
    # Initialize an empty dataframe to store recommendations
    recommendations = pd.DataFrame(columns=['Place', 'Score'])

    for place in input_places:
        # Get the row for the input place
        place_row = correlation_matrix.loc[place]
        
        # Get the places with the highest correlation with the input place
        similar_places = place_row.sort_values(ascending=False)
        
        # Remove the input place itself from the recommendations
        similar_places = similar_places.drop(place)
        
        # Get the top num_recommendations places
        top_recommendations = pd.DataFrame({'Place': similar_places.index, 'Score': similar_places.values}).head(num_recommendations)
        
        # Add the top recommendations to the recommendations dataframe
        recommendations = recommendations.append(top_recommendations, ignore_index=True)
    
    # Sort the recommendations by the score in descending order
    recommendations = recommendations.sort_values(by='Score', ascending=False)
    print(recommendations)
    # Get the top num_recommendations places
    top_places = recommendations['Place'].head(num_recommendations).tolist()
    
    return top_places


In [30]:
places=['강원 강릉시', '광주 서구', '인천 중구']
recommend_places(places)

      Place     Score
5     광주 동구  0.746767
6    광주 광산구  0.725138
10   경북 경주시  0.691174
7     광주 북구  0.678990
11   경남 남해군  0.625067
8    부산 동래구  0.604433
9    대구 달서구  0.603588
12   경기 파주시  0.547535
0    경기 안산시  0.538435
13  서울 영등포구  0.483504
1    충남 서산시  0.480635
2    경기 시흥시  0.480146
3    부산 수영구  0.468338
14   전남 순천시  0.438973
4     대구 동구  0.436997


  recommendations = recommendations.append(top_recommendations, ignore_index=True)
  recommendations = recommendations.append(top_recommendations, ignore_index=True)
  recommendations = recommendations.append(top_recommendations, ignore_index=True)


['광주 동구', '광주 광산구', '경북 경주시', '광주 북구', '경남 남해군']