In [1]:
#Build recommendation model: Content based filtering

#Import refined restaurant public data
import pandas as pd

rest_df = pd.read_csv('rest_public_data.csv')

len(rest_df)
rest_df

Unnamed: 0.1,Unnamed: 0,rest_name,rest_category,rest_number,rest_address,rest_location,rest_rating,rest_density
0,0,로비라운지,경양식,02 5597034,서울특별시 강남구 삼성동 159번지,"[37.5101935, 127.0585824]",80.29,24.35
1,1,로비바,경양식,02 5597034,서울특별시 강남구 삼성동 159번지,"[37.5101935, 127.0585824]",70.12,83.01
2,2,노블발렌티,한식,02 5400711,서울특별시 강남구 삼성동 109-6번지,"[37.5152844, 127.0648056]",75.53,71.62
3,3,취화선,중국식,02 5080409,서울특별시 강남구 대치동 974-1번지,"[37.5007989, 127.0601254]",65.53,26.55
4,4,고향집,중국식,02 5474301,서울특별시 강남구 논현동 237-0번지,"[37.5092943, 127.0367839]",98.96,96.35
...,...,...,...,...,...,...,...,...
3351,3419,LG시그니처키친스위트 청담쇼룸 다이닝,기타,02 69669402,서울특별시 강남구 청담동 82-1,"[37.526298, 127.0421124]",68.37,49.29
3352,3420,삼호,한식,02 29713585,서울특별시 강남구 역삼동 673-31 태선빌딩,"[37.5034325, 127.0399653]",83.39,73.63
3353,3422,인정국물떡볶이,분식,02 553 5591,서울특별시 강남구 논현동 108 논현웰스톤,"[37.5184174, 127.0380027]",72.91,97.08
3354,3423,농민백암,한식,02 565 9603,서울특별시 강남구 대치동 896-37,"[37.5037637, 127.0531732]",99.27,29.40


In [2]:
#Add a column that is a combination of rating and density
rest_df = rest_df.drop(rest_df.columns[0], axis=1)
rest_df['recommend'] = (rest_df['rest_rating']*0.3 + rest_df['rest_density']*0.7)
rest_df

rest_df.to_csv('data_0211.csv')
rest_df.to_json('data_0211.json')

In [7]:
#Content based filtering

#Vectorize texts into numbers
from sklearn.feature_extraction.text import CountVectorizer

count_vector = CountVectorizer(ngram_range=(1, 3))
c_vector_genres = count_vector.fit_transform(rest_df['rest_category'])
c_vector_genres.shape
c_vector_genres

<3356x27 sparse matrix of type '<class 'numpy.int64'>'
	with 3546 stored elements in Compressed Sparse Row format>

In [8]:
#Get cosine_similarity with the vectors
from sklearn.metrics.pairwise import cosine_similarity

gerne_c_sim = cosine_similarity(c_vector_genres, c_vector_genres).argsort()[:, ::-1]
gerne_c_sim.shape

(3356, 3356)

In [3]:
#Make recommendation by population density
def get_recommend_rest_list(rest_df, rest_title, top=30):
    
    #Extract information from the input restaurant as we want to get restaurants that are similar to the input
    target_rest_index = rest_df[rest_df['rest_name'] == rest_title].index.values
     
    #Take out information from one that has similar cosine similarity as the input restaurant
    sim_index = gerne_c_sim[target_rest_index, :top].reshape(-1)
    #Exclude oneself from the recommendation
    sim_index = sim_index[sim_index != target_rest_index]
    
    #Make into a dataframe, sort, and return the result
    result = rest_df.iloc[sim_index].sort_values('rest_density', ascending=True)[:10]
    #result = rest_df.iloc[sim_index]
    return result


#Make recommendation by rating
def get_recommend_rest_list2(rest_df, rest_title, top=30):
    
    #Extract information from the input restaurant as we want to get restaurants that are similar to the input
    target_rest_index = rest_df[rest_df['rest_name'] == rest_title].index.values
    
    #Take out information from one that has similar cosine similarity as the input restaurant
    sim_index = gerne_c_sim[target_rest_index, :top].reshape(-1)
    #Exclude oneself from the recommendation
    sim_index = sim_index[sim_index != target_rest_index]
    
    #Make into a dataframe, sort, and return the result
    result = rest_df.iloc[sim_index].sort_values('rest_rating', ascending=False)[:10]
    #result = rest_df.iloc[sim_index]
    return result

#Get recommendation by ABC recommender(A combination of density and rating)
def get_recommend_rest_list3(rest_df, rest_title, top=30):
    
    #Extract information from the input restaurant as we want to get restaurants that are similar to the input
    target_rest_index = rest_df[rest_df['rest_name'] == rest_title].index.values
    
    #Take out information from one that has similar cosine similarity as the input restaurant
    sim_index = gerne_c_sim[target_rest_index, :top].reshape(-1)
    #Exclude oneself from the recommendation
    sim_index = sim_index[sim_index != target_rest_index]
    
    #Make into a dataframe, sort, and return the result
    result = rest_df.iloc[sim_index].sort_values('recommend', ascending=True)[:10]
    #result = rest_df.iloc[sim_index]
    return result

In [4]:
#Recommendation list by low density
get_recommend_rest_list(rest_df, rest_title="강남역점 홍콩반점0410")

Unnamed: 0,rest_name,rest_category,rest_number,rest_address,rest_location,rest_rating,rest_density,recommend
1980,양자강,중국식,02 566 8116,서울특별시 강남구 역삼동 719-9번지 외1필지 지상2층,"[37.5010846, 127.0403049]",87.96,5.02,29.902
2543,니뽕내뽕(코엑스점),중국식,02 60025151,서울특별시 강남구 삼성동 159번지,"[37.5101935, 127.0585824]",89.53,6.67,31.528
153,난랑,중국식,02 5492888,서울특별시 강남구 논현동 86-5번지,"[37.5152321, 127.031597]",92.25,12.43,36.376
145,강성환차이나,중국식,02 5619974,서울특별시 강남구 삼성동 143-30번지,"[37.5067431, 127.0539115]",65.25,14.94,30.033
186,남산희래등,중국식,02 5625296,서울특별시 강남구 대치동 903-3번지,"[37.5026182, 127.0567492]",86.51,15.9,37.083
1477,홍운장,중국식,02 5583888,서울특별시 강남구 대치동 913-14번지 지상1층,"[37.5018551, 127.058911]",86.1,16.76,37.562
236,미래향,중국식,02 564 6133,서울특별시 강남구 대치동 941-27번지,"[37.4985196, 127.0588217]",54.1,19.58,29.936
152,대범천,중국식,02 5453357,서울특별시 강남구 삼성동 1-4번지,"[37.516637, 127.04178]",95.72,26.76,47.448
1330,친친,중국식,02 5577722,서울특별시 강남구 삼성동 142-3번지,"[37.5060621, 127.0509138]",95.48,27.36,47.796
370,예향,중국식,02 5396178,서울특별시 강남구 역삼동 830-70 2층,"[37.4942764, 127.0330313]",80.9,29.01,44.577


In [5]:
#Recommendation by high rating number
get_recommend_rest_list2(rest_df, rest_title="강남역점 홍콩반점0410")

Unnamed: 0,rest_name,rest_category,rest_number,rest_address,rest_location,rest_rating,rest_density,recommend
1329,만리향,중국식,02 5439840,서울특별시 강남구 논현동 268-3번지 지상2층,"[37.5122037, 127.0428751]",96.77,45.3,60.741
152,대범천,중국식,02 5453357,서울특별시 강남구 삼성동 1-4번지,"[37.516637, 127.04178]",95.72,26.76,47.448
1330,친친,중국식,02 5577722,서울특별시 강남구 삼성동 142-3번지,"[37.5060621, 127.0509138]",95.48,27.36,47.796
783,믿음식당,중국식,02 5717495,서울특별시 강남구 개포동 660-4번지 종합상가B-143144,"[37.480557, 127.0583551]",93.04,29.54,48.59
785,자금성,중국식,02 5446655,서울특별시 강남구 청담동 44-7번지,"[37.5187469, 127.0464114]",92.51,71.89,78.076
153,난랑,중국식,02 5492888,서울특별시 강남구 논현동 86-5번지,"[37.5152321, 127.031597]",92.25,12.43,36.376
1495,쌍용반점,중국식,02 5550426,서울특별시 강남구 대치동 66-0번지 지상1층117호,"[37.4979951, 127.0709857]",91.64,65.27,73.181
189,도곡원,중국식,02 5721776,서울특별시 강남구 도곡동 419-9번지,"[37.4855381, 127.0454452]",89.75,29.14,47.323
2543,니뽕내뽕(코엑스점),중국식,02 60025151,서울특별시 강남구 삼성동 159번지,"[37.5101935, 127.0585824]",89.53,6.67,31.528
1980,양자강,중국식,02 566 8116,서울특별시 강남구 역삼동 719-9번지 외1필지 지상2층,"[37.5010846, 127.0403049]",87.96,5.02,29.902


In [6]:
#Recommendation by ABC recommendation
get_recommend_rest_list3(rest_df, rest_title="강남역점 홍콩반점0410")

Unnamed: 0,rest_name,rest_category,rest_number,rest_address,rest_location,rest_rating,rest_density,recommend
1980,양자강,중국식,02 566 8116,서울특별시 강남구 역삼동 719-9번지 외1필지 지상2층,"[37.5010846, 127.0403049]",87.96,5.02,29.902
236,미래향,중국식,02 564 6133,서울특별시 강남구 대치동 941-27번지,"[37.4985196, 127.0588217]",54.1,19.58,29.936
145,강성환차이나,중국식,02 5619974,서울특별시 강남구 삼성동 143-30번지,"[37.5067431, 127.0539115]",65.25,14.94,30.033
2543,니뽕내뽕(코엑스점),중국식,02 60025151,서울특별시 강남구 삼성동 159번지,"[37.5101935, 127.0585824]",89.53,6.67,31.528
153,난랑,중국식,02 5492888,서울특별시 강남구 논현동 86-5번지,"[37.5152321, 127.031597]",92.25,12.43,36.376
186,남산희래등,중국식,02 5625296,서울특별시 강남구 대치동 903-3번지,"[37.5026182, 127.0567492]",86.51,15.9,37.083
1477,홍운장,중국식,02 5583888,서울특별시 강남구 대치동 913-14번지 지상1층,"[37.5018551, 127.058911]",86.1,16.76,37.562
370,예향,중국식,02 5396178,서울특별시 강남구 역삼동 830-70 2층,"[37.4942764, 127.0330313]",80.9,29.01,44.577
189,도곡원,중국식,02 5721776,서울특별시 강남구 도곡동 419-9번지,"[37.4855381, 127.0454452]",89.75,29.14,47.323
152,대범천,중국식,02 5453357,서울특별시 강남구 삼성동 1-4번지,"[37.516637, 127.04178]",95.72,26.76,47.448
