# 추천 시스템

#### 추천시스템은 머신러닝에서 크게 콘텐츠 기반 필터링 , 협업 필터링 방식으로 나눌 수 있다.

#### 콘텐츠기반 추천시스템
- 코사인 유사도 기반

#### 협업필터링(CF: Collaborative filtering) 추천시스템
- 아이템 기반 협업 필터링
- 행렬 분해 기반 협업 필터링
ex) 넥플릭스



# Part1. 콘텐츠 기반 추천시스템
## 코사인 유사도 기반

# Data & Library

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer , CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet

import warnings
warnings.filterwarnings(action='ignore')

In [3]:
df=pd.read_csv('the-movies-dataset/movies_metadata.csv')

In [4]:
data=df.copy()

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45466 entries, 0 to 45465
Data columns (total 24 columns):
adult                    45466 non-null object
belongs_to_collection    4494 non-null object
budget                   45466 non-null object
genres                   45466 non-null object
homepage                 7782 non-null object
id                       45466 non-null object
imdb_id                  45449 non-null object
original_language        45455 non-null object
original_title           45466 non-null object
overview                 44512 non-null object
popularity               45461 non-null object
poster_path              45080 non-null object
production_companies     45463 non-null object
production_countries     45463 non-null object
release_date             45379 non-null object
revenue                  45460 non-null float64
runtime                  45203 non-null float64
spoken_languages         45460 non-null object
status                   45379 non-null objec

In [6]:
data.isnull()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
1,False,True,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,True,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45461,False,True,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
45462,False,True,False,False,True,False,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
45463,False,True,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
45464,False,True,False,False,True,False,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False


In [7]:
data.columns

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count'],
      dtype='object')

## Data Cleansing / Preprocessing 

In [6]:
data=data[['id','genres','vote_average','vote_count', 'popularity', 'title','overview']] 

# 일반적으로 vote수가 많을 수록 평점이 떨어질 수 도 있고 높아 질 수도 있는 경우를 위해 불공정 처리 공식을 도입
#Weight rating(WR) = (v / (v+m)) * R + (m / v+m) * C
#r: 개별 영화 평점
#v: 개별 영화에 평점을 투표한 횟수
#m: 최소 투표수 (정하기 나름)
#c: 전체 영화에 대한 평균 평점.

In [7]:
#m=500위이라 가정후, 500위 이내의 data로 분석 진행

m=data['vote_count'].quantile(0.9)
data=data.loc[data['vote_count']>m]

In [8]:
data

Unnamed: 0,id,genres,vote_average,vote_count,popularity,title,overview
0,862,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",7.7,5415.0,21.9469,Toy Story,"Led by Woody, Andy's toys live happily in his ..."
1,8844,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",6.9,2413.0,17.0155,Jumanji,When siblings Judy and Peter discover an encha...
4,11862,"[{'id': 35, 'name': 'Comedy'}]",5.7,173.0,8.38752,Father of the Bride Part II,Just when George Banks has recovered from his ...
5,949,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",7.7,1886.0,17.9249,Heat,"Obsessive master thief, Neil McCauley leads a ..."
8,9091,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",5.5,174.0,5.23158,Sudden Death,International action superstar Jean Claude Van...
...,...,...,...,...,...,...,...
45174,339692,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",6.9,324.0,15.786854,Shot Caller,A newly-released prison gangster is forced by ...
45204,417870,"[{'id': 35, 'name': 'Comedy'}]",7.1,393.0,37.964872,Girls Trip,Four girlfriends take a trip to New Orleans fo...
45258,417320,"[{'id': 10770, 'name': 'TV Movie'}, {'id': 107...",7.5,171.0,15.842073,Descendants 2,When the pressure to be royal becomes too much...
45265,265189,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",6.8,255.0,12.165685,Force Majeure,"While holidaying in the French Alps, a Swedish..."


In [9]:
c=data['vote_average'].mean()

In [10]:
print(c)

6.474371970030853


In [11]:
print(m)

160.0


In [12]:
def weight_rating(x, m=m, c=c):
    v = x['vote_count']
    R = x['vote_average']
    
    return (v / (v+m) *R) + (m / (m+v) *c)

In [13]:
data['score'] = data.apply(weight_rating, axis=1)

In [14]:
data

Unnamed: 0,id,genres,vote_average,vote_count,popularity,title,overview,score
0,862,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",7.7,5415.0,21.9469,Toy Story,"Led by Woody, Andy's toys live happily in his ...",7.664825
1,8844,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",6.9,2413.0,17.0155,Jumanji,When siblings Judy and Peter discover an encha...,6.873533
4,11862,"[{'id': 35, 'name': 'Comedy'}]",5.7,173.0,8.38752,Father of the Bride Part II,Just when George Banks has recovered from his ...,6.072071
5,949,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",7.7,1886.0,17.9249,Heat,"Obsessive master thief, Neil McCauley leads a ...",7.604154
8,9091,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",5.5,174.0,5.23158,Sudden Death,International action superstar Jean Claude Van...,5.966765
...,...,...,...,...,...,...,...,...
45174,339692,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",6.9,324.0,15.786854,Shot Caller,A newly-released prison gangster is forced by ...,6.759297
45204,417870,"[{'id': 35, 'name': 'Comedy'}]",7.1,393.0,37.964872,Girls Trip,Four girlfriends take a trip to New Orleans fo...,6.918986
45258,417320,"[{'id': 10770, 'name': 'TV Movie'}, {'id': 107...",7.5,171.0,15.842073,Descendants 2,When the pressure to be royal becomes too much...,7.004228
45265,265189,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",6.8,255.0,12.165685,Force Majeure,"While holidaying in the French Alps, a Swedish...",6.674457


In [22]:
from ast import literal_eval 

data['genres']=data['genres'].apply(literal_eval)

In [23]:
data['genres'][0] #name안에 있는 단어만 뽑아와야함

[{'id': 16, 'name': 'Animation'},
 {'id': 35, 'name': 'Comedy'},
 {'id': 10751, 'name': 'Family'}]

In [24]:
data['genres']=data['genres'].apply(lambda x : [d['name'] for d in x]).apply(lambda x : " ".join(x))

In [48]:
#인덱스 리셋 필요 **
data=data.reset_index()

In [50]:
data=data.drop('index', axis=1, inplace=False)

In [51]:
# 현재 장르는 전처리 후 한글자 씩 구분되어 있습니다. 
# 이 문자열을 숫자로 바꾸어 백터화 시킵니다.

from sklearn.feature_extraction.text import CountVectorizer

count_vector = CountVectorizer(ngram_range=(1,3))

In [52]:
c_vector_genres= count_vector.fit_transform(data['genres'])
c_vector_genres.shape

(4538, 1049)

In [53]:
#코사인 유사도 기반
#영화제목을 입력하면 그와 가장 비슷한 영화 추천
#argsort : 작은 순서대로 인덱스를 반환

from sklearn.metrics.pairwise import cosine_similarity
gerne_csim=cosine_similarity(c_vector_genres, c_vector_genres).argsort()[:,:]

In [54]:
gerne_csim.shape

(4538, 4538)

In [55]:
def recommend_movie(df, movie_title, top=30):
    #검색하려는 target영화 뽑기
    target_movie = df[df['title'] == movie_title].index.values
    
    #코사인 유사도 중 비슷한 코사인 유사도를 가진 정보 뽑기.
    sim_index = gerne_csim[target_movie, :top]
    
    #target(자기자신)을 제외한 top 30 movies 뽑기
    sim_index = sim_index[sim_index != target_movie]
    
    #정렬
    result = df.iloc[sim_index].sort_values('score',ascending=False)
    return result

In [56]:
recommend_movie(data, movie_title = 'The Dark Knight Rises')

Unnamed: 0,id,genres,vote_average,vote_count,popularity,title,overview,score
0,862,Animation Comedy Family,7.7,5415.0,21.9469,Toy Story,"Led by Woody, Andy's toys live happily in his ...",7.664825
2135,15097,Comedy,7.7,193.0,7.9854,Fear City: A Family-Style Comedy,A second-class horror movie has to be shown at...,7.144475
2094,2295,Comedy,6.9,407.0,8.46673,Clerks II,A calamity at Dante and Randall's shops sends ...,6.779893
2098,1781,Documentary,6.7,257.0,6.68492,An Inconvenient Truth,A documentary on Al Gore's campaign to make th...,6.613428
2086,920,Animation Adventure Comedy Family,6.6,3991.0,18.9079,Cars,"Lightning McQueen, a hotshot rookie race car d...",6.595158
2064,12763,Music,6.6,171.0,5.36446,Take the Lead,A former professional dancer volunteers to tea...,6.539273
2142,496,Comedy,6.5,1617.0,8.27834,Borat: Cultural Learnings of America for Make ...,Kazakh journalist Borat Sagdiyev travels to Am...,6.497692
2109,512,Comedy Mystery,6.4,435.0,8.16339,Scoop,An American journalism student in London scoop...,6.419999
2069,9035,Comedy Horror Science Fiction,6.3,324.0,10.375,Slither,"A small town is taken over by an alien plague,...",6.357644
2133,7512,Adventure Comedy Science Fiction,6.3,681.0,12.1308,Idiocracy,To test its top-secret Human Hibernation Proje...,6.333174


# Part2. 협업 필터링 추천 시스템

## 아이템 기반 협업 필터링


In [58]:
rating = pd.read_csv('C:/Users/Kim/Downloads/movielens-small/ratings.csv')
movie = pd.read_csv('C:/Users/Kim/Downloads/movielens-small/movies.csv')

In [59]:
rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [60]:
movie.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


## 정규표현식

In [98]:
import re

genre_list=list(movie['genres'])
clear_list=[]

for text in genre_list:
    pattern=re.compile('[|]') #(^[A-Za-z]+)
    text=re.sub(pattern=pattern, repl=' ', string=text)
    clear_list.append(text)
    
movie['clear_genres']=clear_list

In [99]:
movie

Unnamed: 0,movieId,title,genres,clear_genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,Adventure Animation Children Comedy Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy,Adventure Children Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance,Comedy Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,Comedy Drama Romance
4,5,Father of the Bride Part II (1995),Comedy,Comedy
...,...,...,...,...
9120,162672,Mohenjo Daro (2016),Adventure|Drama|Romance,Adventure Drama Romance
9121,163056,Shin Godzilla (2016),Action|Adventure|Fantasy|Sci-Fi,Action Adventure Fantasy Sci-Fi
9122,163949,The Beatles: Eight Days a Week - The Touring Y...,Documentary,Documentary
9123,164977,The Gay Desperado (1936),Comedy,Comedy


In [101]:
rating = rating.drop('timestamp', axis=1, inplace=False)

In [102]:
user_movie_rating = pd.merge(rating, movie, on='movieId') #merge : on (공통키 기준)

In [103]:
user_movie_rating

Unnamed: 0,userId,movieId,rating,title,genres,clear_genres
0,1,31,2.5,Dangerous Minds (1995),Drama,Drama
1,7,31,3.0,Dangerous Minds (1995),Drama,Drama
2,31,31,4.0,Dangerous Minds (1995),Drama,Drama
3,32,31,4.0,Dangerous Minds (1995),Drama,Drama
4,36,31,3.0,Dangerous Minds (1995),Drama,Drama
...,...,...,...,...,...,...
99999,664,64997,2.5,War of the Worlds (2005),Action|Sci-Fi,Action Sci-Fi
100000,664,72380,3.5,"Box, The (2009)",Drama|Horror|Mystery|Sci-Fi|Thriller,Drama Horror Mystery Sci-Fi Thriller
100001,665,129,3.0,Pie in the Sky (1996),Comedy|Romance,Comedy Romance
100002,665,4736,1.0,Summer Catch (2001),Comedy|Drama|Romance,Comedy Drama Romance


In [104]:
movie_user_rating = user_movie_rating.pivot_table('rating', index='title', columns='userId')
user_movie_rating = user_movie_rating.pivot_table('rating', index= 'userId', columns='title')


In [106]:
user_movie_rating

title,"""Great Performances"" Cats (1998)",$9.99 (2008),'Hellboy': The Seeds of Creation (2004),'Neath the Arizona Skies (1934),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),...,Zulu (1964),Zulu (2013),[REC] (2007),eXistenZ (1999),loudQUIETloud: A Film About the Pixies (2006),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931),İtirazım Var (2014)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
667,,,,,,,,,,,...,,,,,,,,,,
668,,,,,,,,,,,...,,,,,,,,,,
669,,,,,,,,,,,...,,,,,,,,,,
670,,,,,,,,,,,...,,,,,,,,,,


In [107]:
movie_user_rating.fillna(0 , inplace=True) 
#아이템기반 필터링이다 보니 col = userid , index = movie

In [110]:
movie_user_rating

userId,1,2,3,4,5,6,7,8,9,10,...,662,663,664,665,666,667,668,669,670,671
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Great Performances"" Cats (1998)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$9.99 (2008),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Hellboy': The Seeds of Creation (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Neath the Arizona Skies (1934),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Round Midnight (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
xXx (2002),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
xXx: State of the Union (2005),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
¡Three Amigos! (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
À nous la liberté (Freedom for Us) (1931),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [111]:
item_based_collabor = cosine_similarity(movie_user_rating)

In [112]:
item_based_collabor

array([[1.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 1.        , 0.        , ..., 0.05821787, 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.05821787, 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [113]:
item_based_collabor = pd.DataFrame(data = item_based_collabor,index= movie_user_rating.index, columns=movie_user_rating.index)

In [114]:
item_based_collabor

title,"""Great Performances"" Cats (1998)",$9.99 (2008),'Hellboy': The Seeds of Creation (2004),'Neath the Arizona Skies (1934),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),...,Zulu (1964),Zulu (2013),[REC] (2007),eXistenZ (1999),loudQUIETloud: A Film About the Pixies (2006),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931),İtirazım Var (2014)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Great Performances"" Cats (1998)",1.000000,0.000000,0.0,0.164399,0.020391,0.0,0.014046,0.000000,0.000000,0.003166,...,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0000
$9.99 (2008),0.000000,1.000000,0.0,0.000000,0.000000,0.0,0.000000,0.079474,0.000000,0.156330,...,0.000000,0.000000,0.0,0.000000,0.0,0.013899,0.000000,0.058218,0.0,0.0000
'Hellboy': The Seeds of Creation (2004),0.000000,0.000000,1.0,0.000000,0.000000,1.0,0.000000,0.217357,0.000000,0.000000,...,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0000
'Neath the Arizona Skies (1934),0.164399,0.000000,0.0,1.000000,0.124035,0.0,0.085436,0.000000,0.000000,0.019259,...,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0000
'Round Midnight (1986),0.020391,0.000000,0.0,0.124035,1.000000,0.0,0.010597,0.143786,0.000000,0.136163,...,0.000000,0.000000,0.0,0.121567,0.0,0.000000,0.000000,0.000000,0.0,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
xXx (2002),0.000000,0.013899,0.0,0.000000,0.000000,0.0,0.000000,0.123940,0.000000,0.144961,...,0.161281,0.076029,0.0,0.017465,0.0,1.000000,0.152057,0.140222,0.0,0.2661
xXx: State of the Union (2005),0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.134815,...,0.000000,0.000000,0.0,0.000000,0.0,0.152057,1.000000,0.000000,0.0,0.0000
¡Three Amigos! (1986),0.000000,0.058218,0.0,0.000000,0.000000,0.0,0.081620,0.331663,0.214498,0.064908,...,0.112588,0.159223,0.0,0.166622,0.0,0.140222,0.000000,1.000000,0.0,0.0000
À nous la liberté (Freedom for Us) (1931),0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,1.0,0.0000


In [117]:
def get_item_based_collar(title):
    return item_based_collabor[title].sort_values(ascending=False)[:6]

In [119]:
get_item_based_collar('"Great Performances" Cats (1998)')

title
"Great Performances" Cats (1998)    1.000000
Agent Cody Banks (2003)             0.986394
Alice in Wonderland (1933)          0.986394
Air Bud: Golden Receiver (1998)     0.948441
Are We There Yet? (2005)            0.935775
Alvin and the Chipmunks (2007)      0.898923
Name: "Great Performances" Cats (1998), dtype: float64