<h3 style="color:tomato">RECOMMENDER SYSTEM PROJECT</h3>

<h5>Building a Hybrid Movie Recommendation System using Collaborative and Content-Based Filtering</h5>

<h4 style='color:tomato'>Dataset: MovieLens 100K</h4>

In [1]:
import pandas as pd

In [2]:
ratings = pd.read_csv("ml-100k/u.data", sep='\t', names=["userId", "movieId", "rating", "timestamp"])
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [3]:
movies = pd.read_csv("ml-100k/u.item", sep="|", encoding="latin-1", header=None)
movies = movies[[0,1]]
movies.columns = ["movieId", "title"]
movies.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [4]:
print(ratings.shape)
print(movies.shape)

(100000, 4)
(1682, 2)


In [5]:
data = pd.merge(ratings, movies, on="movieId")
data.head()

Unnamed: 0,userId,movieId,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,186,302,3,891717742,L.A. Confidential (1997)
2,22,377,1,878887116,Heavyweights (1994)
3,244,51,2,880606923,Legends of the Fall (1994)
4,166,346,1,886397596,Jackie Brown (1997)


<h4 style='color:tomato'>COLLABORATIVE FILTERING (USER BASED)</h4>

In [6]:
user_movie_matrix = data.pivot_table(index="userId", columns="title", values="rating")
user_movie_matrix.head()

title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,2.0,5.0,,,3.0,4.0,,,...,,,,5.0,3.0,,,,4.0,
2,,,,,,,,,1.0,,...,,,,,,,,,,
3,,,,,2.0,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,2.0,,,,,4.0,,,...,,,,4.0,,,,,4.0,


In [7]:
user_movie_filled = user_movie_matrix.fillna(0)
user_movie_filled.head()

title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,2.0,5.0,0.0,0.0,3.0,4.0,0.0,0.0,...,0.0,0.0,0.0,5.0,3.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,2.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0,0.0


In [8]:
from sklearn.metrics.pairwise import cosine_similarity

user_similarity = cosine_similarity(user_movie_filled)

user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_filled.index, columns=user_movie_filled.index)
user_similarity_df.head()

userId,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.168937,0.048388,0.064561,0.37967,0.429682,0.443097,0.320079,0.078385,0.377733,...,0.372213,0.11986,0.26986,0.193343,0.197949,0.118722,0.315064,0.149086,0.181612,0.399432
2,0.168937,1.0,0.113393,0.179694,0.073623,0.242106,0.108604,0.104257,0.16247,0.161273,...,0.147095,0.310661,0.363328,0.410725,0.322713,0.231096,0.228793,0.162911,0.175273,0.106732
3,0.048388,0.113393,1.0,0.349781,0.021592,0.074018,0.067423,0.084419,0.062039,0.066217,...,0.033885,0.043453,0.16714,0.071288,0.126278,0.026758,0.164539,0.102899,0.136757,0.02699
4,0.064561,0.179694,0.349781,1.0,0.031804,0.068431,0.091507,0.18806,0.101284,0.060859,...,0.054615,0.036784,0.133619,0.196561,0.146058,0.030202,0.196858,0.152041,0.171538,0.058752
5,0.37967,0.073623,0.021592,0.031804,1.0,0.238636,0.374733,0.24893,0.056847,0.201427,...,0.340183,0.08058,0.095284,0.081053,0.148607,0.071612,0.239955,0.139595,0.153799,0.313941


In [9]:
def get_similar_users(user_id, top_n=5):
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).iloc[1:top_n+1]
    return similar_users

In [10]:
get_similar_users(200)

userId
472    0.587780
109    0.568259
881    0.566536
882    0.560439
301    0.555843
Name: 200, dtype: float64

In [11]:
def recommend_movies(user_id, top_n=5):
    similar_users = get_similar_users(user_id)

    user_rated_movies = user_movie_matrix.loc[user_id]
    unseen_movies = user_rated_movies[user_rated_movies.isna()].index

    scores = {}

    for sim_user, similarity in similar_users.items():
        sim_user_ratings = user_movie_matrix.loc[sim_user, unseen_movies]

        for movie, rating in sim_user_ratings.dropna().items():
            scores[movie] = scores.get(movie,0) + similarity * rating

    ranked_movies = sorted(scores.items(), key = lambda x: x[1], reverse=True)

    return ranked_movies[:top_n]

In [12]:
def show_recommendation(user_id, top_n=5):
    recs = recommend_movies(user_id, top_n)

    print(f"\nTop {top_n} recommendations for User {user_id}: \n")
    for movie, score in recs:
         print(f"{movie} --> score: {round(score, 2)}")

In [13]:
show_recommendation(262)


Top 5 recommendations for User 262: 

Dances with Wolves (1990) --> score: 11.57
Usual Suspects, The (1995) --> score: 11.09
Fugitive, The (1993) --> score: 10.63
Princess Bride, The (1987) --> score: 10.61
American President, The (1995) --> score: 10.18


<h4 style='color:tomato'>COLLABORATIVE FILTERING (ITEM BASED)</h4>

In [14]:
item_user_matrix = user_movie_matrix.T
item_user_filled = item_user_matrix.fillna(0)
item_user_filled

userId,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Til There Was You (1997),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1-900 (1994),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
101 Dalmatians (1996),2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,0.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0
12 Angry Men (1957),5.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
187 (1997),0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Young Guns II (1990),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
"Young Poisoner's Handbook, The (1995)",0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,...,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zeus and Roxanne (1997),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
unknown,4.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
from sklearn.metrics.pairwise import cosine_similarity

item_similarity = cosine_similarity(item_user_filled)

item_similarity_df = pd.DataFrame(
    item_similarity,
    index=item_user_filled.index,
    columns=item_user_filled.index
)

item_similarity_df.head()


title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Til There Was You (1997),1.0,0.0,0.024561,0.099561,0.185236,0.159265,0.0,0.052203,0.0,0.033326,...,0.0,0.0,0.0,0.027774,0.11884,0.142315,0.02907,0.0,0.110208,0.0
1-900 (1994),0.0,1.0,0.014139,0.009294,0.007354,0.004702,0.010055,0.067038,0.0,0.0,...,0.152499,0.015484,0.0,0.069284,0.018243,0.023408,0.006694,0.07964,0.042295,0.0
101 Dalmatians (1996),0.024561,0.014139,1.0,0.167006,0.061105,0.143878,0.203781,0.225803,0.027642,0.092337,...,0.0,0.021965,0.030905,0.274877,0.204267,0.101199,0.056976,0.172155,0.045714,0.0
12 Angry Men (1957),0.099561,0.009294,0.167006,1.0,0.056822,0.167235,0.304078,0.422506,0.072682,0.394854,...,0.060946,0.016502,0.0,0.40327,0.259436,0.145519,0.105226,0.038901,0.060101,0.081261
187 (1997),0.185236,0.007354,0.061105,0.056822,1.0,0.132327,0.042928,0.06506,0.043133,0.0273,...,0.0,0.141997,0.0,0.068257,0.067786,0.091293,0.09949,0.025184,0.142667,0.096449


In [16]:
def get_similar_movies(movie_title, top_n=5):
    similar_movies = item_similarity_df[movie_title].sort_values(ascending=False).iloc[1:top_n+1]
    
    return similar_movies


In [17]:
def item_based_recommend(user_id, top_n=5):
    user_ratings = user_movie_matrix.loc[user_id].dropna()
    
    scores = {}

    for movie, rating in user_ratings.items():
        similar_movies = item_similarity_df[movie]

        for sim_movie, similarity in similar_movies.items():
            if sim_movie not in user_ratings.index:
                scores[sim_movie] = scores.get(sim_movie, 0) + similarity * rating

    ranked_movies = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    return ranked_movies[:top_n]


In [18]:
def show_item_based_recommendations(user_id, top_n=5):
    recs = item_based_recommend(user_id, top_n)
    
    print(f"\nItem-based recommendations for User {user_id}:\n")
    for movie, score in recs:
        print(f"{movie} --> score: {round(score, 2)}")


In [19]:
show_item_based_recommendations(user_id=125)


Item-based recommendations for User 125:

E.T. the Extra-Terrestrial (1982) --> score: 261.95
Terminator 2: Judgment Day (1991) --> score: 251.35
Dead Poets Society (1989) --> score: 248.16
True Lies (1994) --> score: 244.85
Star Trek: The Wrath of Khan (1982) --> score: 244.76


<h4 style='color:tomato'>CONTENT BASED FILTERING</h4>

In [20]:
movies_full = pd.read_csv("ml-100k/u.item", sep="|", encoding="latin-1", header=None)

genre_cols = [
    "unknown","Action","Adventure","Animation","Children","Comedy","Crime",
    "Documentary","Drama","Fantasy","Film-Noir","Horror","Musical","Mystery",
    "Romance","Sci-Fi","Thriller","War","Western"
]

movies_full.columns = (
    ["movieId","title","release_date","video_release","imdb_url"]
    + genre_cols
)
movies_full


Unnamed: 0,movieId,title,release_date,video_release,imdb_url,unknown,Action,Adventure,Animation,Children,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,1678,Mat' i syn (1997),06-Feb-1998,,http://us.imdb.com/M/title-exact?Mat%27+i+syn+...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1678,1679,B. Monkey (1998),06-Feb-1998,,http://us.imdb.com/M/title-exact?B%2E+Monkey+(...,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,0
1679,1680,Sliding Doors (1998),01-Jan-1998,,http://us.imdb.com/Title?Sliding+Doors+(1998),0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1680,1681,You So Crazy (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?You%20So%20Cr...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
def combine_genres(row):
    return " ".join([genre for genre in genre_cols if row[genre] == 1])

movies_full["genres_text"] = movies_full.apply(combine_genres, axis=1)
movies_full[["title","genres_text"]].head()

Unnamed: 0,title,genres_text
0,Toy Story (1995),Animation Children Comedy
1,GoldenEye (1995),Action Adventure Thriller
2,Four Rooms (1995),Thriller
3,Get Shorty (1995),Action Comedy Drama
4,Copycat (1995),Crime Drama Thriller


In [22]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(movies_full["genres_text"])
tfidf_matrix

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 3018 stored elements and shape (1682, 21)>

In [23]:
from sklearn.metrics.pairwise import cosine_similarity

content_similarity = cosine_similarity(tfidf_matrix)

content_similarity_df = pd.DataFrame(
    content_similarity,
    index=movies_full["title"],
    columns=movies_full["title"]
)
content_similarity_df

title,Toy Story (1995),GoldenEye (1995),Four Rooms (1995),Get Shorty (1995),Copycat (1995),Shanghai Triad (Yao a yao yao dao waipo qiao) (1995),Twelve Monkeys (1995),Babe (1995),Dead Man Walking (1995),Richard III (1995),...,Mirage (1995),Mamma Roma (1962),"Sunchaser, The (1996)","War at Home, The (1996)",Sweet Nothing (1995),Mat' i syn (1997),B. Monkey (1998),Sliding Doors (1998),You So Crazy (1994),Scream of Stone (Schrei aus Stein) (1991)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Toy Story (1995),1.000000,0.000000,0.000000,0.188602,0.000000,0.000000,0.000000,0.616159,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.349419,0.000000
GoldenEye (1995),0.000000,1.000000,0.536767,0.381454,0.307005,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.759103,0.000000,0.000000,0.000000,0.000000,0.000000,0.378506,0.000000,0.000000,0.000000
Four Rooms (1995),0.000000,0.536767,1.000000,0.000000,0.571953,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.707107,0.000000,0.000000,0.000000,0.000000,0.000000,0.705158,0.000000,0.000000,0.000000
Get Shorty (1995),0.188602,0.381454,0.000000,1.000000,0.163891,0.451259,0.146115,0.437359,0.451259,0.182908,...,0.502507,0.451259,0.451259,0.451259,0.451259,0.451259,0.000000,0.240949,0.539759,0.451259
Copycat (1995),0.000000,0.307005,0.571953,0.163891,1.000000,0.363186,0.117598,0.144814,0.363186,0.147210,...,0.404432,0.363186,0.363186,0.363186,0.363186,0.363186,0.403317,0.193923,0.000000,0.363186
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Mat' i syn (1997),0.000000,0.000000,0.000000,0.451259,0.363186,1.000000,0.323795,0.398732,1.000000,0.405329,...,0.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.000000,0.533950,0.000000,1.000000
B. Monkey (1998),0.000000,0.378506,0.705158,0.000000,0.403317,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.498622,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.599513,0.000000,0.000000
Sliding Doors (1998),0.000000,0.000000,0.000000,0.240949,0.193923,0.533950,0.172890,0.212903,0.533950,0.216425,...,0.000000,0.533950,0.533950,0.533950,0.533950,0.533950,0.599513,1.000000,0.000000,0.533950
You So Crazy (1994),0.349419,0.000000,0.000000,0.539759,0.000000,0.000000,0.000000,0.476931,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000


In [24]:
def content_based_recommend(movie_title, top_n=5):
    similar_movies = content_similarity_df[movie_title].sort_values(ascending=False).iloc[1:top_n+1]
    
    return similar_movies


In [25]:
def content_based_user_recommend(user_id, top_n=5):
    user_ratings = data[data["userId"] == user_id]
    liked_movies = user_ratings[user_ratings["rating"] >= 4]["title"]

    scores = {}

    for movie in liked_movies:
        similar_movies = content_similarity_df[movie]

        for sim_movie, similarity in similar_movies.items():
            if sim_movie not in liked_movies.values:
                scores[sim_movie] = scores.get(sim_movie, 0) + similarity

    ranked_movies = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    return ranked_movies[:top_n]


In [26]:
content_based_user_recommend(user_id=262)

[('Chasing Amy (1997)', 43.893269155690675),
 ('Sliding Doors (1998)', 43.893269155690675),
 ("Ulee's Gold (1997)", 40.739880734342044),
 ('Ice Storm, The (1997)', 40.739880734342044),
 ('Substance of Fire, The (1996)', 40.739880734342044)]

<h4 style='color:tomato'>HYBRID RECOMMENDER SYSTEM</h4>

In [27]:
def normalize_scores(recommendations):
    scores = dict(recommendations)
    max_score = max(scores.values()) if scores else 1
    return {k: v / max_score for k, v in scores.items()}


In [28]:
def hybrid_recommend(user_id, top_n=5, alpha=0.6, beta=0.4):
    
    # Collaborative recommendations
    collab_recs = recommend_movies(user_id, top_n=20)
    collab_scores = normalize_scores(collab_recs)

    # Content-based recommendations
    content_recs = content_based_user_recommend(user_id, top_n=20)
    content_scores = normalize_scores(content_recs)

    final_scores = {}

    # Combine scores
    for movie in set(collab_scores) | set(content_scores):
        final_scores[movie] = (
            alpha * collab_scores.get(movie, 0) +
            beta * content_scores.get(movie, 0)
        )

    ranked_movies = sorted(
        final_scores.items(),
        key=lambda x: x[1],
        reverse=True
    )

    return ranked_movies[:top_n]


In [29]:
def show_hybrid_recommendations(user_id, top_n=5):
    recs = hybrid_recommend(user_id, top_n)
    
    print(f"\nHybrid Recommendations for User {user_id}:\n")
    for movie, score in recs:
        print(f"{movie} --> score: {round(score, 3)}")


In [30]:
show_hybrid_recommendations(user_id=262)


Hybrid Recommendations for User 262:

American President, The (1995) --> score: 0.763
Dances with Wolves (1990) --> score: 0.6
Usual Suspects, The (1995) --> score: 0.575
Fugitive, The (1993) --> score: 0.551
Princess Bride, The (1987) --> score: 0.55
