In [1]:
import numpy as np
import pandas as pd

# 유클리드 거리 측정
def euclidean_distance(x, y):
	euclidean = []
	x = x.iloc[0].tolist()
	for idx, row in y.iterrows():
		e = np.sqrt(np.sum((x - row.values) ** 2))
		euclidean.append(e)
	euclidean = np.array(euclidean)
	return euclidean.reshape(1, -1)

# user_info와 성향이 비슷한 사람을 top_n만큼 추천
def find_similiar_user(user_info, df, top_n=10, first_del=False):
	start_n = 0
	if first_del == True:
		top_n = top_n + 1
		start_n = 1
	sim = euclidean_distance(user_info, df.iloc[:,6:])
	sim_sorted_idx = sim.argsort()
	sim_sorted_idx = sim_sorted_idx.reshape(-1).tolist()

	sim_sorted_id = df.iloc[sim_sorted_idx, 0]
	sim_sorted_id = sim_sorted_id.tolist()
	sim_sorted_id = sim_sorted_id[start_n:top_n]
	print("나의 BIG-FIVE 성향과 유사한 사람들 id : \n", sim_sorted_id)
	print("나의 BIG-FIVE 성향과 유사한 사람들 index : \n", sim_sorted_idx[start_n:top_n])
	return sim_sorted_id
		

In [2]:
# 가중치를 통한 영화 추천
def recommend_movie(user_info, df, movie, top_n=10, quantile=0.6):

	sim_sorted_id = find_similiar_user(user_info, df, top_n)

	reviewed_movie = set() # 중복을 제하기 위해 set()으로 객체생성

	for id in sim_sorted_id:
		reviewed_movie.update(movie[movie["userid"] == id].loc[:, "movie_id"].tolist())

	reviewed_movie = list(reviewed_movie)

	rating_df = pd.pivot_table(movie, index=["movie_id", "expected_name"], values=["rating"], aggfunc=["count", "mean"])
	rating_df.columns = ["count", "mean"]
	rating_df = rating_df.loc[reviewed_movie, :]

	C = np.mean(rating_df["mean"])
	m = rating_df["count"].quantile(quantile)
	print('C : {0}, m : {1}'.format(C, m))
	
	weight = []
	for i in range(len(rating_df)):
		v = rating_df.iloc[i]["count"]
		R = rating_df.iloc[i]["mean"]
		weight.append((v / (v + m)) * R + (m / (v + m)) * C)
	rating_df["weight"] = weight
	
	return rating_df.sort_values('weight', ascending=False)[:10] # 상위 10개의 영화만 리턴

In [3]:
from sklearn.preprocessing import MinMaxScaler

# start ~ end 사이의 값을 MinMaxScaler를 적용
def change_range(df, start, end):
	# MinMaxScaler
	pd.set_option("mode.chained_assignment", None)
	scaler = MinMaxScaler()
	train_array = np.arange(start, end).reshape(-1, 1)
	scaler.fit(train_array)
	tmp = scaler.transform(df["openness"].values.reshape(-1, 1))
	df["openness_scaler"] = tmp.reshape(-1)
	tmp = scaler.transform(df["agreeableness"].values.reshape(-1, 1))
	df["agreeableness_scaler"] = tmp.reshape(-1)
	tmp = scaler.transform(df["emotional_stability"].values.reshape(-1, 1))
	df["emotional_stability_scaler"] = tmp.reshape(-1)
	tmp = scaler.transform(df["conscientiousness"].values.reshape(-1, 1))
	df["conscientiousness_scaler"] = tmp.reshape(-1)
	tmp = scaler.transform(df["extraversion"].values.reshape(-1, 1))
	df["extraversion_scaler"] = tmp.reshape(-1)
	return df


#### 아이템 기반의 협업 필터링에서 개인화된 예측 평점
$\hat{R_{u,i}} = \sum N(S_{i,N}*R_{u,N}) / \sum N(|S_{i,N}|)$
- $\hat{R_{u,i}}$ : 사용자 u, 아이템 i의 개인화된 예측 평점 값
- $S_{i,N}$ : 아이템 i와 가장 유사도가 높은 Top-N개 아이템의 유사도 벡터
- $R_{u,N}$ : 사용자 u의 아이템 i와 가장 유사도가 높은 Top-N개 아이템에 대한 실제 평점 벡터

In [4]:
# 평점 예측
def predict_rating(ratings_arr, sim_arr):
	ratings_pred = ratings_arr.dot(sim_arr)/np.array([np.abs(sim_arr).sum(axis=1)])
	return ratings_pred

In [5]:
from sklearn.metrics import mean_squared_error
# mean_squared_error 
def get_mse(pred, real):
	pred = pred[real.nonzero()].flatten()
	real = real[real.nonzero()].flatten()
	return mean_squared_error(pred, real)

In [6]:
# 조금 더 정확성울 위해 평점 유사도에서 유사도가 높은 n개의 영화만 사용해서 평점을 예측
def predict_rating_topsim(ratings_arr, sim_arr, n=10):
	# 사용자-영화 평점 행렬 크기만큼 0으로 채운 예측 행렬
	pred = np.zeros(ratings_arr.shape)

	# 사용자-영화 평점 행렬의 열 크기만큼 루프 => 영화 개수만큼 루프
	for col in range(ratings_arr.shape[1]):
		# 유사도 행렬 (sim_arr)에서 유사도가 큰 순으로 n개 데이터
		top_n = [np.argsort(sim_arr[:, col])[:-n-1:-1]]
		# 평점 예측
		for row in range(ratings_arr.shape[0]):
			pred[row, col] = sim_arr[col, :][top_n].dot(ratings_arr[row, :][top_n].T)
			pred[row, col] /= np.sum(np.abs(sim_arr[col, :][top_n]))
	return pred

In [7]:
from sklearn.metrics.pairwise import cosine_similarity

# 영화 평점 간 유사도 측정
def search_ratings_sim(ratings):
	# 영화 간의 평점 유사도를 볼 것이기 때문에 전치행렬을 구해준다
	ratings_T = ratings.transpose()
	# 성격유형이 유사한 10명의 영화 평점을 기준으로 영화의 유사도를 생성
	rating_sim = cosine_similarity(ratings_T, ratings_T)
	rating_sim_df = pd.DataFrame(data=rating_sim, index=ratings_T.index, columns=ratings_T.index)
	return rating_sim_df

---

In [8]:
import pandas as pd

personality = pd.read_csv("./data/2018-personality-data.csv")
personality.columns = personality.columns.str.strip()
personality_big5 = personality[['userid', 'openness', 'agreeableness', 'emotional_stability',
		'conscientiousness', 'extraversion']]
personality_big5.head()

Unnamed: 0,userid,openness,agreeableness,emotional_stability,conscientiousness,extraversion
0,8e7cebf9a234c064b75016249f2ac65e,5.0,2.0,3.0,2.5,6.5
1,77c7d756a093150d4377720abeaeef76,7.0,4.0,6.0,5.5,4.0
2,b7e8a92987a530cc368719a0e60e26a3,4.0,3.0,4.5,2.0,2.5
3,92561f21446e017dd6b68b94b23ad5b7,5.5,5.5,4.0,4.5,4.0
4,030001ac2145a938b07e686a35a2d638,5.5,5.5,3.5,4.5,2.5


In [9]:
ratings = pd.read_csv("./data/new_ratings.csv", index_col=0)
ratings.head()

Unnamed: 0,userid,movie_id,rating,tstamp,expected_name
0,8e7cebf9a234c064b75016249f2ac65e,1,5.0,2001-09-10 17:19:56,Toy Story (1995)
1,8e7cebf9a234c064b75016249f2ac65e,2,4.0,2001-09-28 11:34:55,Jumanji (1995)
2,8e7cebf9a234c064b75016249f2ac65e,3,4.0,2001-09-28 11:42:50,Grumpier Old Men (1995)
3,8e7cebf9a234c064b75016249f2ac65e,5,5.0,2001-09-28 11:27:30,Father of the Bride Part II (1995)
4,8e7cebf9a234c064b75016249f2ac65e,6,4.0,2002-01-07 18:12:02,Heat (1995)


In [10]:

personality_big5 = change_range(personality_big5, 1, 8) # scaler
personality_big5.head()

Unnamed: 0,userid,openness,agreeableness,emotional_stability,conscientiousness,extraversion,openness_scaler,agreeableness_scaler,emotional_stability_scaler,conscientiousness_scaler,extraversion_scaler
0,8e7cebf9a234c064b75016249f2ac65e,5.0,2.0,3.0,2.5,6.5,0.666667,0.166667,0.333333,0.25,0.916667
1,77c7d756a093150d4377720abeaeef76,7.0,4.0,6.0,5.5,4.0,1.0,0.5,0.833333,0.75,0.5
2,b7e8a92987a530cc368719a0e60e26a3,4.0,3.0,4.5,2.0,2.5,0.5,0.333333,0.583333,0.166667,0.25
3,92561f21446e017dd6b68b94b23ad5b7,5.5,5.5,4.0,4.5,4.0,0.75,0.75,0.5,0.583333,0.5
4,030001ac2145a938b07e686a35a2d638,5.5,5.5,3.5,4.5,2.5,0.75,0.75,0.416667,0.583333,0.25


In [11]:
test_info = personality_big5.iloc[12:13, :] #12번째 데이터를 테스트용으로 사용
test_info

Unnamed: 0,userid,openness,agreeableness,emotional_stability,conscientiousness,extraversion,openness_scaler,agreeableness_scaler,emotional_stability_scaler,conscientiousness_scaler,extraversion_scaler
12,2507e0e5af7f4c6bef436c8cd0e90bc3,6.5,4.0,2.5,6.0,4.0,0.916667,0.5,0.25,0.833333,0.5


In [12]:
similiar_user = find_similiar_user(test_info.iloc[:, 6:], personality_big5, top_n=10, first_del=True)
similiar_user # 성격유형이 유사한 top_n명 찾기

나의 BIG-FIVE 성향과 유사한 사람들 id : 
 ['3f77830c1b96c0ce003e248a069cf61d', '73b13da703becd9ab9903197cf246dee', '35afcbf0c50ede54759a40183469178c', 'ea4734eb95ee46f675d5768919930cc2', '1c1286bced458836d6c082753a70facc', '08419a2991116300be4f440f45f1c71e', '655e9ec8da090890d4301410f5b8dee5', '29269716223ac43f3921533adce28fce', '15ba0b093a9a409d176282ab8847047e', '192ff60bd8841ae6727ed1861e60156b']
나의 BIG-FIVE 성향과 유사한 사람들 index : 
 [97, 1726, 1625, 856, 718, 1518, 1505, 1525, 488, 1257]


['3f77830c1b96c0ce003e248a069cf61d',
 '73b13da703becd9ab9903197cf246dee',
 '35afcbf0c50ede54759a40183469178c',
 'ea4734eb95ee46f675d5768919930cc2',
 '1c1286bced458836d6c082753a70facc',
 '08419a2991116300be4f440f45f1c71e',
 '655e9ec8da090890d4301410f5b8dee5',
 '29269716223ac43f3921533adce28fce',
 '15ba0b093a9a409d176282ab8847047e',
 '192ff60bd8841ae6727ed1861e60156b']

In [13]:
ratings = ratings.pivot_table('rating', index='userid', columns='expected_name')
ratings

expected_name,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (1964),Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
005fe8678214011d7f92e51f9a546d40,,,,,,,,,,,...,,,,,,,,,,
0066fac81b62656f032c085d96e378f4,,,,,,,,,,,...,,,,,,,,,,
00fa91e202f5e48aa34c05d97867fa74,,,,,,,,,3.0,3.0,...,,,,,,4.5,1.5,,,
011aedbea90fb3b6d1e7a47526b3bee6,,,,,,,2.5,,,,...,,,3.5,,,2.0,2.5,,,
013325441cd1755b18c58b8007646bb1,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
feb85d3dbf8878d745c16a431bf6396e,,,,,,,,,4.5,,...,,,,,,,,,,
fefc161ac0b429fdbf6b56cc04dd4aa6,,,,,,,,,3.0,,...,,,,,,,,,,
ff14f409ad4f18cfcc72d2478355464d,,,,,,,,,,,...,,,,,,,,,,
ff8b293ae258fe516c39acecb966dba4,,,,,,,,,5.0,,...,,,,,,,,,,


In [14]:
# 특정 사용자들의 영화 평점 리스트 유사도 찾기
similiar_ratings = ratings.loc[similiar_user] # 아까 구했던 성격 유형이 유사한 top_n명의 평점 데이터만 꺼내기
similiar_ratings = similiar_ratings.fillna(0)  # nan을 0으로 만들기
similiar_ratings

expected_name,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (1964),Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3f77830c1b96c0ce003e248a069cf61d,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
73b13da703becd9ab9903197cf246dee,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
35afcbf0c50ede54759a40183469178c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ea4734eb95ee46f675d5768919930cc2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.5,0.0,0.0,0.0
1c1286bced458836d6c082753a70facc,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
08419a2991116300be4f440f45f1c71e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
655e9ec8da090890d4301410f5b8dee5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
29269716223ac43f3921533adce28fce,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15ba0b093a9a409d176282ab8847047e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0
192ff60bd8841ae6727ed1861e60156b,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
# 평점 유사도를 구하기 위해 정치행렬을 취한다
similiar_ratings_T = similiar_ratings.transpose()
similiar_ratings_T.head()

userid,3f77830c1b96c0ce003e248a069cf61d,73b13da703becd9ab9903197cf246dee,35afcbf0c50ede54759a40183469178c,ea4734eb95ee46f675d5768919930cc2,1c1286bced458836d6c082753a70facc,08419a2991116300be4f440f45f1c71e,655e9ec8da090890d4301410f5b8dee5,29269716223ac43f3921533adce28fce,15ba0b093a9a409d176282ab8847047e,192ff60bd8841ae6727ed1861e60156b
expected_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
'71 (2014),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Hellboy': The Seeds of Creation (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Round Midnight (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Salem's Lot (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Til There Was You (1997),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
from sklearn.metrics.pairwise import cosine_similarity

rating_sim = cosine_similarity(similiar_ratings_T, similiar_ratings_T)
rating_sim_df = pd.DataFrame(data=rating_sim, index=similiar_ratings.columns, columns=similiar_ratings.columns)
rating_sim_df # 성격유형이 유사한 10명의 영화 평점들간 유사도

expected_name,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (1964),Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
expected_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'71 (2014),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Hellboy': The Seeds of Creation (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Round Midnight (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Salem's Lot (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Til There Was You (1997),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
eXistenZ (1999),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
xXx (2002),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.64,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
xXx: State of the Union (2005),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
¡Three Amigos! (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
ratings_pred = predict_rating(similiar_ratings.values, rating_sim_df.values) # 평점 예측
ratings_pred_df = pd.DataFrame(data=ratings_pred, index=similiar_ratings.index, columns=similiar_ratings.columns)
ratings_pred_df.head()

  ratings_pred = ratings_arr.dot(sim_arr)/np.array([np.abs(sim_arr).sum(axis=1)])


expected_name,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (1964),Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3f77830c1b96c0ce003e248a069cf61d,,,,,,,,,0.359634,,...,,,,,,,0.311399,,,
73b13da703becd9ab9903197cf246dee,,,,,,,,,0.193712,,...,,,,,,,0.186948,,,
35afcbf0c50ede54759a40183469178c,,,,,,,,,0.288722,,...,,,,,,,0.267646,,,
ea4734eb95ee46f675d5768919930cc2,,,,,,,,,0.972428,,...,,,,,,,1.861385,,,
1c1286bced458836d6c082753a70facc,,,,,,,,,0.366417,,...,,,,,,,0.320818,,,


In [18]:
print("아이템 기반 모든 최근접 이웃 MSE : ", get_mse(ratings_pred, similiar_ratings.values))

아이템 기반 모든 최근접 이웃 MSE :  4.1649630648576625


---

In [19]:
# 조금 더 확실하게 하기 위해서 유사도가 높은 10개의 영화만 가지고 평점 예측
ratings_pred_2 = predict_rating_topsim(similiar_ratings.values, rating_sim_df.values)
ratings_pred_2_df = pd.DataFrame(data=ratings_pred_2, index=similiar_ratings.index, columns=similiar_ratings.columns)
ratings_pred_2_df.head()

  pred[row, col] = sim_arr[col, :][top_n].dot(ratings_arr[row, :][top_n].T)
  pred[row, col] /= np.sum(np.abs(sim_arr[col, :][top_n]))
  pred[row, col] /= np.sum(np.abs(sim_arr[col, :][top_n]))


expected_name,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (1964),Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3f77830c1b96c0ce003e248a069cf61d,,,,,,,,,0.0,,...,,,,,,,0.0,,,
73b13da703becd9ab9903197cf246dee,,,,,,,,,0.0,,...,,,,,,,0.0,,,
35afcbf0c50ede54759a40183469178c,,,,,,,,,0.0,,...,,,,,,,0.0,,,
ea4734eb95ee46f675d5768919930cc2,,,,,,,,,0.0,,...,,,,,,,2.14982,,,
1c1286bced458836d6c082753a70facc,,,,,,,,,0.0,,...,,,,,,,0.0,,,


In [20]:
print("아이템 기반 모든 최근접 이웃 MSE : ", get_mse(ratings_pred_2, similiar_ratings.values))

아이템 기반 모든 최근접 이웃 MSE :  0.6857309498965477


In [21]:
ratings_pred_2_df

expected_name,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (1964),Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3f77830c1b96c0ce003e248a069cf61d,,,,,,,,,0.0,,...,,,,,,,0.0,,,
73b13da703becd9ab9903197cf246dee,,,,,,,,,0.0,,...,,,,,,,0.0,,,
35afcbf0c50ede54759a40183469178c,,,,,,,,,0.0,,...,,,,,,,0.0,,,
ea4734eb95ee46f675d5768919930cc2,,,,,,,,,0.0,,...,,,,,,,2.14982,,,
1c1286bced458836d6c082753a70facc,,,,,,,,,0.0,,...,,,,,,,0.0,,,
08419a2991116300be4f440f45f1c71e,,,,,,,,,0.0,,...,,,,,,,0.0,,,
655e9ec8da090890d4301410f5b8dee5,,,,,,,,,2.600069,,...,,,,,,,0.0,,,
29269716223ac43f3921533adce28fce,,,,,,,,,0.0,,...,,,,,,,0.0,,,
15ba0b093a9a409d176282ab8847047e,,,,,,,,,3.500098,,...,,,,,,,2.799826,,,
192ff60bd8841ae6727ed1861e60156b,,,,,,,,,0.0,,...,,,,,,,0.0,,,


In [22]:
test = ratings_pred_2_df.iloc[-1][~ratings_pred_2_df.iloc[0].isna()]
len(test.values[test.values!=0])

128

In [23]:
test = similiar_ratings.iloc[-1][~similiar_ratings.iloc[0].isna()]
len(test.values[test.values!=0])

76

마지막 사용자에 대해서 50개 정도가 예상 점수로 추가되었다.

---

In [26]:
# 영화 추천해주기 
ratings_pred_2_df = ratings_pred_2_df.dropna(axis=1)
ratings_pred_2_df

expected_name,(500) Days of Summer (2009),10 Things I Hate About You (1999),101 Dalmatians (1996),102 Dalmatians (2000),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),13 Assassins (Jûsan-nin no shikaku) (2010),13 Going on 30 (2004),"13th Warrior, The (1999)",...,"Yours, Mine and Ours (2005)",Youth (2015),Zack and Miri Make a Porno (2008),Zathura (2005),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zoom (2006),Zootopia (2016),xXx (2002)
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3f77830c1b96c0ce003e248a069cf61d,0.0,0.0,0.0,0.0,4.793738,3.774425,0.0,0.0,0.0,0.0,...,0.0,4.55,0.0,0.0,3.42558,0.0,0.0,0.0,4.542562,0.0
73b13da703becd9ab9903197cf246dee,0.0,0.0,0.0,0.0,1.655398,0.0,3.845985,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.799741,0.0
35afcbf0c50ede54759a40183469178c,0.0,0.0,4.392041,0.0,0.0,4.184717,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ea4734eb95ee46f675d5768919930cc2,0.0,0.0,0.234928,0.0,0.488468,0.796946,0.236356,3.45,0.0,0.0,...,0.0,0.0,0.0,0.0,0.539823,3.850095,0.0,0.0,4.542833,2.14982
1c1286bced458836d6c082753a70facc,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.339617,0.0,0.0,0.0,0.377491,0.0
08419a2991116300be4f440f45f1c71e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
655e9ec8da090890d4301410f5b8dee5,2.600069,0.0,0.263431,0.0,0.296064,0.342516,0.139384,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,4.203524,2.899603,2.70202,0.0,0.952559,0.0
29269716223ac43f3921533adce28fce,0.0,0.0,3.046777,0.0,0.625782,0.293399,0.0,0.0,3.65,0.0,...,3.65,0.0,0.0,3.65,0.0,4.20059,0.0,3.65,0.234225,0.0
15ba0b093a9a409d176282ab8847047e,3.500098,3.25,0.813761,3.25,4.147193,2.984927,3.161438,0.0,0.0,3.25,...,0.0,0.0,3.25,0.0,4.113627,3.498095,0.303494,0.0,3.763822,2.799826
192ff60bd8841ae6727ed1861e60156b,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.237178,0.0


In [87]:
# np.std(ratings_pred_2_df.iloc[:,2][ratings_pred_2_df.iloc[:,2] >= 1])

0.6726319101377147

In [91]:
# ratings_mean = np.mean(ratings_pred_2_df[ratings_pred_2_df.iloc[:,:] >= 1]).sort_values()[::-1]

In [92]:
ratings_mean = np.mean(ratings_pred_2_df).sort_values()[::-1]
ratings_mean

expected_name
Matrix, The (1999)         3.614722
Toy Story (1995)           3.598449
Inception (2010)           3.497116
Interstellar (2014)        3.430345
Dark Knight, The (2008)    3.411243
                             ...   
Smokin' Aces (2006)        0.305000
Pay It Forward (2000)      0.305000
Secret Window (2004)       0.305000
Mist, The (2007)           0.300551
Zoolander (2001)           0.300551
Length: 1836, dtype: float64

In [93]:
print("추천 영화 10개 : \n")
print(ratings_mean[:10])

추천 영화 10개 : 

expected_name
Matrix, The (1999)                                       3.614722
Toy Story (1995)                                         3.598449
Inception (2010)                                         3.497116
Interstellar (2014)                                      3.430345
Dark Knight, The (2008)                                  3.411243
Forrest Gump (1994)                                      3.272809
Lord of the Rings: The Return of the King, The (2003)    3.253468
American History X (1998)                                3.243913
Memento (2000)                                           3.186190
Shawshank Redemption, The (1994)                         3.132047
dtype: float64
