In [1]:
# 콘텐츠 기반 필터링

# 협업 필터링
# - 최근접 이웃 기반 협업필터링 - 사용자 평점 매트릭스
#   -- 사용자 기반
#   -- 아이템 기반
# - 잠재요인 협업 필터링 - 행렬 변환

In [2]:
# 사용자 평점
# 명시적 : 영화평점, 상품 평점, 좋아요, 리뷰
# 암시적 : 조회, 구매, 방문수, 머문 시간

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
movies = pd.read_csv('movie.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27278 entries, 0 to 27277
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  27278 non-null  int64 
 1   title    27278 non-null  object
 2   genres   27278 non-null  object
dtypes: int64(1), object(2)
memory usage: 639.5+ KB


In [6]:
ratings = pd.read_csv('rating.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40


In [7]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000263 entries, 0 to 20000262
Data columns (total 4 columns):
 #   Column     Dtype  
---  ------     -----  
 0   userId     int64  
 1   movieId    int64  
 2   rating     float64
 3   timestamp  object 
dtypes: float64(1), int64(2), object(1)
memory usage: 610.4+ MB


###  사용자 기반

In [8]:
merge_df = pd.merge(movies, ratings, on='movieId')
merge_df

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3,4.0,1999-12-11 13:36:47
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,6,5.0,1997-03-13 17:50:52
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,8,4.0,1996-06-05 13:37:51
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,10,4.0,1999-11-25 02:44:47
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,11,4.5,2009-01-02 01:13:41
...,...,...,...,...,...,...
20000258,131254,Kein Bund für's Leben (2007),Comedy,79570,4.0,2015-03-30 19:32:59
20000259,131256,"Feuer, Eis & Dosenbier (2002)",Comedy,79570,4.0,2015-03-30 19:48:08
20000260,131258,The Pirates (2014),Adventure,28906,2.5,2015-03-30 19:56:32
20000261,131260,Rentun Ruusu (2001),(no genres listed),65409,3.0,2015-03-30 19:57:46


In [9]:
merge_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20000263 entries, 0 to 20000262
Data columns (total 6 columns):
 #   Column     Dtype  
---  ------     -----  
 0   movieId    int64  
 1   title      object 
 2   genres     object 
 3   userId     int64  
 4   rating     float64
 5   timestamp  object 
dtypes: float64(1), int64(2), object(3)
memory usage: 1.0+ GB


In [10]:
ratings = merge_df[['movieId', 'userId', 'rating']]

ratings['movieId'] = ratings['movieId'].astype('int16')
ratings['userId'] = ratings['userId'].astype('int16')
ratings['rating'] = ratings['rating'].astype('int16')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ratings['movieId'] = ratings['movieId'].astype('int16')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ratings['userId'] = ratings['userId'].astype('int16')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ratings['rating'] = ratings['rating'].astype('int16')


In [11]:
ratings_matrix = ratings.pivot_table(index='userId', columns='movieId', values='rating')

In [12]:
import pickle

In [13]:
with open('ratings_matrix.pickle', 'rb') as f:
    ratings_matrix = pickle.load(f)

In [15]:
# 사용자 평점 매트릭스 - 사용자 기반 추천 시스템
# sparse matrix - 사용자가 직접 본 영화의 수는 제한적이다

ratings_matrix

title,'Round Midnight (1986),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),*batteries not included (1987),...And Justice for All (1979),1-900 (06) (1994),10 (1979),10 Things I Hate About You (1999),100 Girls (2000),...,Zero de conduite (Zero for Conduct) (Zéro de conduite: Jeunes diables au collège) (1933),Zeus and Roxanne (1997),Ziggy Stardust and the Spiders from Mars (1973),Zoolander (2001),"Zorro, the Gay Blade (1981)",Zulu (1964),Zus & Zo (2001),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
702,,,3.0,,,,,2.0,,,...,,,,2.0,,,,,,
703,,,,,,,,,,,...,,,,,,,,,,
704,,,,,,,,,,,...,,,,,,,,,,
705,,,,,,,,,,,...,,,,,,,,,,


In [16]:
ratings_matrix.fillna(0, inplace=True)
ratings_matrix.head()

title,'Round Midnight (1986),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),*batteries not included (1987),...And Justice for All (1979),1-900 (06) (1994),10 (1979),10 Things I Hate About You (1999),100 Girls (2000),...,Zero de conduite (Zero for Conduct) (Zéro de conduite: Jeunes diables au collège) (1933),Zeus and Roxanne (1997),Ziggy Stardust and the Spiders from Mars (1973),Zoolander (2001),"Zorro, the Gay Blade (1981)",Zulu (1964),Zus & Zo (2001),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
ratings_matrix.shape

(706, 6033)

In [21]:
# 고객님과 유사한 사용자가 다음의 콘텐츠도 구매하였습니다

In [17]:
from sklearn.metrics.pairwise import cosine_similarity
user_sim = cosine_similarity(ratings_matrix)    # row간에 cosine 유사도를 계산
user_sim

array([[1.        , 0.10325045, 0.26204582, ..., 0.05736163, 0.09347382,
        0.21888057],
       [0.10325045, 1.        , 0.18012391, ..., 0.01682863, 0.05519785,
        0.04520944],
       [0.26204582, 0.18012391, 1.        , ..., 0.05965044, 0.10166481,
        0.17261861],
       ...,
       [0.05736163, 0.01682863, 0.05965044, ..., 1.        , 0.53484607,
        0.0294181 ],
       [0.09347382, 0.05519785, 0.10166481, ..., 0.53484607, 1.        ,
        0.03368423],
       [0.21888057, 0.04520944, 0.17261861, ..., 0.0294181 , 0.03368423,
        1.        ]])

In [28]:
user_sim.shape

(706, 706)

In [31]:
user_sim_sort = user_sim.argsort(axis=1)[:, ::-1]
user_sim_sort

array([[  0, 146, 387, ..., 512, 433, 209],
       [  1, 664,  77, ..., 531, 304, 191],
       [  2, 474, 133, ..., 512, 383, 463],
       ...,
       [703,  25, 704, ...,  91, 322, 352],
       [704, 514,  25, ..., 512, 526, 112],
       [705, 573, 669, ..., 124, 286, 457]], dtype=int64)

In [35]:
user_sim_sort[10][1:11]         # 유사도가 높은 사용자 상위 10

array([421, 387, 658, 491, 231, 293, 366, 484, 570, 270], dtype=int64)

In [37]:
# 유사도가 높은 사용자 상위 10명의 영화 평점 데이터

df = ratings_matrix.loc[user_sim_sort[10][1:11]]
df

title,'Round Midnight (1986),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),*batteries not included (1987),...And Justice for All (1979),1-900 (06) (1994),10 (1979),10 Things I Hate About You (1999),100 Girls (2000),...,Zero de conduite (Zero for Conduct) (Zéro de conduite: Jeunes diables au collège) (1933),Zeus and Roxanne (1997),Ziggy Stardust and the Spiders from Mars (1973),Zoolander (2001),"Zorro, the Gay Blade (1981)",Zulu (1964),Zus & Zo (2001),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
421,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
387,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0,0.0
658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
491,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
293,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
366,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
484,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
570,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
270,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [39]:
# 10번 사용자와 유사도가 높은 사용자들의 영화평점 순위

df.sum().sort_values(ascending=False)[:5]

title
Silence of the Lambs, The (1991)    23.0
Schindler's List (1993)             22.0
Babe (1995)                         21.5
Usual Suspects, The (1995)          21.5
Crimson Tide (1995)                 21.0
dtype: float64

In [60]:
def seen_movies(user_id):
    movie_list = ratings_matrix.iloc[user_id]
    seen_movies = movie_list[ratings_matrix.iloc[user_id] > 0]
    return seen_movies.index



In [64]:
seen_movies(5)

Index(['Birdcage, The (1996)', 'Down Periscope (1996)', 'Dragonheart (1996)',
       'Executive Decision (1996)', 'Grumpier Old Men (1995)',
       'Independence Day (a.k.a. ID4) (1996)', 'Mighty Aphrodite (1995)',
       'Mission: Impossible (1996)', 'Mr. Holland's Opus (1995)',
       'Multiplicity (1996)', 'Nutty Professor, The (1996)',
       'Phenomenon (1996)', 'Primal Fear (1996)', 'Rock, The (1996)',
       'Sabrina (1995)', 'Sense and Sensibility (1995)', 'Spy Hard (1996)',
       'Star Wars: Episode IV - A New Hope (1977)', 'Striptease (1996)',
       'Toy Story (1995)', 'Truth About Cats & Dogs, The (1996)',
       'Twister (1996)', 'Up Close and Personal (1996)',
       'Willy Wonka & the Chocolate Factory (1971)'],
      dtype='object', name='title')

In [65]:
def sim_user_based_recomm(user_sim, ratings_matrix, user_id, n_top = 10):
    
    seen_movie_list = seen_movies(user_id)
    
    user_sim_sort = user_sim[user_id].argsort()[::-1][:n_top * 5]
    df = ratings_matrix.loc[user_sim_sort]
    
    unseen_movie_list = set(df.index).difference(set(seen_movie_list))
    
    df = df.loc[unseen_movie_list]
    
    return df.sum().sort_values(ascending=False)

In [68]:
sim_user_based_recomm(user_sim, ratings_matrix, 30, n_top = 10)

title
Forrest Gump (1994)                                  91.5
Star Wars: Episode VI - Return of the Jedi (1983)    85.5
Usual Suspects, The (1995)                           85.0
Star Wars: Episode IV - A New Hope (1977)            84.5
Matrix, The (1999)                                   84.0
                                                     ... 
Hear My Song (1991)                                   0.0
Head-On (Gegen die Wand) (2004)                       0.0
Head of State (2003)                                  0.0
Head in the Clouds (2004)                             0.0
¡Three Amigos! (1986)                                 0.0
Length: 6033, dtype: float64

### 아이템 기반 이웃 협업 필터링

In [69]:
ratings_matrix

title,'Round Midnight (1986),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),*batteries not included (1987),...And Justice for All (1979),1-900 (06) (1994),10 (1979),10 Things I Hate About You (1999),100 Girls (2000),...,Zero de conduite (Zero for Conduct) (Zéro de conduite: Jeunes diables au collège) (1933),Zeus and Roxanne (1997),Ziggy Stardust and the Spiders from Mars (1973),Zoolander (2001),"Zorro, the Gay Blade (1981)",Zulu (1964),Zus & Zo (2001),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
702,0.0,0.0,3.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
703,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
704,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
705,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [70]:
# 사용자 평점 기반 영화들간의 유사도 계산

In [72]:
item_sim = cosine_similarity(ratings_matrix.T, ratings_matrix.T)
item_sim

array([[1.        , 0.        , 0.15260952, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 1.        , 0.1665596 , ..., 0.24029835, 0.11793055,
        0.        ],
       [0.15260952, 0.1665596 , 1.        , ..., 0.08100095, 0.        ,
        0.15974555],
       ...,
       [0.        , 0.24029835, 0.08100095, ..., 1.        , 0.11634214,
        0.06914028],
       [0.        , 0.11793055, 0.        , ..., 0.11634214, 1.        ,
        0.03116184],
       [0.        , 0.        , 0.15974555, ..., 0.06914028, 0.03116184,
        1.        ]])

In [74]:
item_sim.shape

(6033, 6033)

In [75]:
item_sim_df = pd.DataFrame(item_sim, index=ratings_matrix.columns, columns=ratings_matrix.columns)
item_sim_df

title,'Round Midnight (1986),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),*batteries not included (1987),...And Justice for All (1979),1-900 (06) (1994),10 (1979),10 Things I Hate About You (1999),100 Girls (2000),...,Zero de conduite (Zero for Conduct) (Zéro de conduite: Jeunes diables au collège) (1933),Zeus and Roxanne (1997),Ziggy Stardust and the Spiders from Mars (1973),Zoolander (2001),"Zorro, the Gay Blade (1981)",Zulu (1964),Zus & Zo (2001),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Round Midnight (1986),1.000000,0.000000,0.152610,0.492366,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
'Til There Was You (1997),0.000000,1.000000,0.166560,0.000000,0.195713,0.272587,0.000000,0.000000,0.084340,0.000000,...,0.0,0.727607,0.0,0.000000,0.727607,0.260432,0.000000,0.240298,0.117931,0.000000
"'burbs, The (1989)",0.152610,0.166560,1.000000,0.075140,0.061574,0.271571,0.000000,0.079698,0.160866,0.000000,...,0.0,0.228914,0.0,0.084723,0.228914,0.081935,0.000000,0.081001,0.000000,0.159746
'night Mother (1986),0.492366,0.000000,0.075140,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
*batteries not included (1987),0.000000,0.195713,0.061574,0.000000,1.000000,0.000000,0.000000,0.000000,0.358280,0.388821,...,0.0,0.000000,0.0,0.061685,0.000000,0.206307,0.000000,0.193077,0.098092,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zulu (1964),0.000000,0.260432,0.081935,0.000000,0.206307,0.134093,0.000000,0.000000,0.090758,0.000000,...,0.0,0.357930,0.0,0.000000,0.357930,1.000000,0.000000,0.084435,0.000000,0.000000
Zus & Zo (2001),0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.0,0.000000,0.000000,0.000000,1.000000,0.212309,0.000000,0.000000
eXistenZ (1999),0.000000,0.240298,0.081001,0.000000,0.193077,0.088376,0.212309,0.000000,0.170217,0.318265,...,0.0,0.235899,0.0,0.071950,0.235899,0.084435,0.212309,1.000000,0.116342,0.069140
xXx (2002),0.000000,0.117931,0.000000,0.000000,0.098092,0.000000,0.000000,0.000000,0.154745,0.062478,...,0.0,0.000000,0.0,0.264831,0.000000,0.000000,0.000000,0.116342,1.000000,0.031162


In [79]:
# 사용자 평점 기준으로 유사한 영화를 추천

item_sim_df['Godfather, The (1972)'].sort_values(ascending=False)[:10]

title
Godfather, The (1972)                                                             1.000000
Godfather: Part II, The (1974)                                                    0.790856
Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)    0.594407
Goodfellas (1990)                                                                 0.579041
One Flew Over the Cuckoo's Nest (1975)                                            0.572015
Star Wars: Episode V - The Empire Strikes Back (1980)                             0.553038
American Beauty (1999)                                                            0.541356
Fargo (1996)                                                                      0.536309
Jaws (1975)                                                                       0.534448
Star Wars: Episode IV - A New Hope (1977)                                         0.532203
Name: Godfather, The (1972), dtype: float64

In [80]:
item_sim_df["Schindler's List (1993)"].sort_values(ascending=False)[1:10]

title
Shawshank Redemption, The (1994)    0.621876
Silence of the Lambs, The (1991)    0.579509
Forrest Gump (1994)                 0.569361
Pulp Fiction (1994)                 0.547534
Jurassic Park (1993)                0.535388
Fugitive, The (1993)                0.532249
Braveheart (1995)                   0.519951
American Beauty (1999)              0.516586
Usual Suspects, The (1995)          0.507931
Name: Schindler's List (1993), dtype: float64

In [81]:
# 영화의 유사도와 사용자 평점을 함께 고려한 영화 추천

In [86]:
# 영화의 유사도 점수에 해당 영화의 평점을 곱한 값
# 유사도가 높고, 평점도 높은 영화가 높은 값을 가진다

item_co_pro = item_sim_df * (ratings_matrix.mean())
item_co_pro.head()

title,'Round Midnight (1986),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),*batteries not included (1987),...And Justice for All (1979),1-900 (06) (1994),10 (1979),10 Things I Hate About You (1999),100 Girls (2000),...,Zero de conduite (Zero for Conduct) (Zéro de conduite: Jeunes diables au collège) (1933),Zeus and Roxanne (1997),Ziggy Stardust and the Spiders from Mars (1973),Zoolander (2001),"Zorro, the Gay Blade (1981)",Zulu (1964),Zus & Zo (2001),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Round Midnight (1986),0.005666,0.0,0.010484,0.009764,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Til There Was You (1997),0.0,0.009915,0.011442,0.0,0.005683,0.010811,0.0,0.0,0.01786,0.0,...,0.0,0.003092,0.0,0.0,0.002061,0.006087,0.0,0.041014,0.010774,0.0
"'burbs, The (1989)",0.000865,0.001651,0.068697,0.00149,0.001788,0.010771,0.0,0.001242,0.034064,0.0,...,0.0,0.000973,0.0,0.01206,0.000648,0.001915,0.0,0.013825,0.0,0.01878
'night Mother (1986),0.00279,0.0,0.005162,0.01983,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
*batteries not included (1987),0.0,0.001941,0.00423,0.0,0.029037,0.0,0.0,0.0,0.075868,0.006058,...,0.0,0.0,0.0,0.008781,0.0,0.004822,0.0,0.032954,0.008962,0.0


In [87]:
item_co_pro['Godfather, The (1972)'].sort_values(ascending=False)[:10]

title
Godfather, The (1972)                                                             1.226629
Godfather: Part II, The (1974)                                                    0.970087
Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)    0.729117
Goodfellas (1990)                                                                 0.710268
One Flew Over the Cuckoo's Nest (1975)                                            0.701651
Star Wars: Episode V - The Empire Strikes Back (1980)                             0.678372
American Beauty (1999)                                                            0.664043
Fargo (1996)                                                                      0.657852
Jaws (1975)                                                                       0.655570
Star Wars: Episode IV - A New Hope (1977)                                         0.652815
Name: Godfather, The (1972), dtype: float64

### 개인화된 아이템 기반 협업 필터링

In [89]:
ratings_matrix.dot(item_sim_df)

# user의 평점점수와 영화의 유사도 점수를 곱하고 더해서 나온 값
# user가 높은 평점을 준 영화에 대해서 특정 영화가 높은 유사도를 가지면 높은 점수가 부여
# 큰 값(유저의 특정 영화에 대한 평점)과 큰 값(해당 영화가 높은 평점의 영화와 유사도)이 곱해져서 큰 값이 나온다

title,'Round Midnight (1986),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),*batteries not included (1987),...And Justice for All (1979),1-900 (06) (1994),10 (1979),10 Things I Hate About You (1999),100 Girls (2000),...,Zero de conduite (Zero for Conduct) (Zéro de conduite: Jeunes diables au collège) (1933),Zeus and Roxanne (1997),Ziggy Stardust and the Spiders from Mars (1973),Zoolander (2001),"Zorro, the Gay Blade (1981)",Zulu (1964),Zus & Zo (2001),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.000000,61.627322,82.917222,12.003437,67.492651,65.713528,20.708418,21.471593,102.202892,34.035989,...,11.128175,60.913769,53.124646,98.474456,60.913769,51.361531,20.708418,112.886068,86.992591,85.619318
2,0.695790,32.634416,32.262569,3.872422,22.019339,27.561104,7.002709,7.042433,34.509237,11.215892,...,2.480639,33.784732,11.823643,28.282579,33.784732,18.647497,7.002709,42.340220,27.641211,30.713436
3,1.961977,92.841600,114.435961,23.063319,81.691875,87.664629,20.991625,31.371245,128.348472,35.695748,...,12.791102,88.103594,45.257612,106.188645,88.103594,62.622795,20.991625,143.204075,93.214240,129.122996
4,0.000000,9.232312,12.815158,0.044005,10.634545,10.223605,1.593772,5.065291,16.415071,4.516712,...,2.041163,9.164781,4.124406,15.958069,9.164781,7.268055,1.593772,14.122179,14.652875,12.151721
5,0.000000,25.872919,37.888849,6.723122,31.859171,26.677719,7.434255,14.600556,50.090643,12.876935,...,7.204042,23.272222,14.680662,42.562613,23.272222,20.757025,7.434255,40.441877,37.023753,37.399801
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
702,3.545639,204.600227,359.581626,41.640627,156.744140,197.682481,68.410042,203.879359,288.898841,59.380598,...,37.305935,207.690948,115.630927,323.901236,207.690948,147.575254,68.410042,301.376523,230.350370,283.665584
703,0.000000,16.365276,21.166549,7.063501,13.365672,16.324583,8.442380,6.707890,23.358950,4.741128,...,5.408261,16.669556,10.123941,22.271674,16.669556,12.236958,8.442380,26.529963,18.535402,24.307785
704,0.000000,17.828304,22.826097,1.295353,19.026225,19.181357,5.262921,7.171617,31.616520,8.284441,...,1.714525,16.891008,7.856742,27.125880,16.891008,13.219116,5.262921,24.711116,23.282558,22.197480
705,0.000000,25.307519,32.201940,3.670441,27.511919,28.265842,7.462521,11.896235,41.724355,13.326578,...,3.684788,23.831050,10.063806,38.528716,23.831050,18.602966,7.462521,37.815199,34.244895,31.561732
