# Movie Recommendation System
This data set consists of:
* 100,000 ratings (1-5) from 943 users on 1682 movies.
* Each user has rated at least 20 movies.

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

## Load ratings data


In [2]:
ratings = pd.read_csv("/content/u.data", sep='\t', names=["user_id", "movie_id", "rating", "timestamp"])
ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


## Load movie data

In [3]:
movies = pd.read_csv("/content/u.item", sep='|', header=None, encoding='latin-1')

In [4]:
movies = movies[[0,1]]   # take only movie_id and title
movies.columns = ["movie_id", "title"]
movies.head()

Unnamed: 0,movie_id,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


## Merge both

In [5]:
df = pd.merge(ratings, movies, on="movie_id")
df.head()

Unnamed: 0,user_id,movie_id,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,186,302,3,891717742,L.A. Confidential (1997)
2,22,377,1,878887116,Heavyweights (1994)
3,244,51,2,880606923,Legends of the Fall (1994)
4,166,346,1,886397596,Jackie Brown (1997)


In [6]:
df.shape

(100000, 5)

## Build User-Item Matrix

In [7]:
matrix = df.pivot_table(index='user_id', columns='title', values='rating')
matrix.head()

title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,2.0,5.0,,,3.0,4.0,,,...,,,,5.0,3.0,,,,4.0,
2,,,,,,,,,1.0,,...,,,,,,,,,,
3,,,,,2.0,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,2.0,,,,,4.0,,,...,,,,4.0,,,,,4.0,


In [8]:
matrix.shape

(943, 1664)

## Compute user similarity matrix

In [11]:
matrix_filled = matrix.fillna(0)
user_similarity = cosine_similarity(matrix_filled)

In [12]:
user_similarity_df = pd.DataFrame(user_similarity, index=matrix.index, columns=matrix.index)
user_similarity_df.head()

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.168937,0.048388,0.064561,0.37967,0.429682,0.443097,0.320079,0.078385,0.377733,...,0.372213,0.11986,0.26986,0.193343,0.197949,0.118722,0.315064,0.149086,0.181612,0.399432
2,0.168937,1.0,0.113393,0.179694,0.073623,0.242106,0.108604,0.104257,0.16247,0.161273,...,0.147095,0.310661,0.363328,0.410725,0.322713,0.231096,0.228793,0.162911,0.175273,0.106732
3,0.048388,0.113393,1.0,0.349781,0.021592,0.074018,0.067423,0.084419,0.062039,0.066217,...,0.033885,0.043453,0.16714,0.071288,0.126278,0.026758,0.164539,0.102899,0.136757,0.02699
4,0.064561,0.179694,0.349781,1.0,0.031804,0.068431,0.091507,0.18806,0.101284,0.060859,...,0.054615,0.036784,0.133619,0.196561,0.146058,0.030202,0.196858,0.152041,0.171538,0.058752
5,0.37967,0.073623,0.021592,0.031804,1.0,0.238636,0.374733,0.24893,0.056847,0.201427,...,0.340183,0.08058,0.095284,0.081053,0.148607,0.071612,0.239955,0.139595,0.153799,0.313941


In [13]:
user_similarity_df.shape

(943, 943)

## Recommendation Function

In [14]:
def recommend_movies(user_id, num_recommendations=5):
    # get similar users
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)[1:6].index

    # movies rated by similar users
    sim_users_ratings = matrix.loc[similar_users]

    # average ratings from similar users
    avg_ratings = sim_users_ratings.mean().sort_values(ascending=False)

    # remove movies already rated by this user
    movies_watched = matrix.loc[user_id][matrix.loc[user_id].notnull()].index
    avg_ratings = avg_ratings.drop(movies_watched, errors='ignore')

    return avg_ratings.head(num_recommendations)

## Try recommendation for one user

In [19]:
recommend_movies(1, 10)

Unnamed: 0_level_0,0
title,Unnamed: 1_level_1
Wings of Desire (1987),5.0
"People vs. Larry Flynt, The (1996)",5.0
Shadowlands (1993),5.0
Casablanca (1942),5.0
Chinatown (1974),5.0
Titanic (1997),5.0
It's a Wonderful Life (1946),5.0
Secrets & Lies (1996),5.0
L.A. Confidential (1997),5.0
Hamlet (1996),5.0


## Evaluate recommendation using Precision@K

In [26]:
def precision_at_k(user_id, k=10, threshold=4.0):

    recs = recommend_movies(user_id, k)
    actual_user_ratings = matrix.loc[user_id]

    hits = 0
    for movie in recs.index:
        if movie in actual_user_ratings and actual_user_ratings[movie] >= threshold:
            hits += 1

    precision = hits / k
    print(f"\nPrecision@{k} for user {user_id}: {precision:.2f}")
    return precision

**Precision @ K**

---



Precision@K measures how many of the top K recommended movies were actually "liked" by the user (rating >= 4).

In our case, the value of Precision@10 be 0.00.The reason is related to the MovieLens-100K dataset itself:

- The dataset does not contain information about **future** user ratings after the recommendation.
- Most recommended movies for the target user are movies that the user has **not rated yet** in the dataset.
- Therefore, those items count as "non hits" during evaluation, resulting in a Precision@K value of 0.

*This is a known limitation of evaluating collaborative filtering recommenders on static datasets. It is normal to observe zero Precision@K values when no ground-truth future preferences are available.*




---



#  Implement item-based collaborative filtering

Instead of finding similar users, we find similar movies based on how users rated them.

## Compute item (movie) similarity matrix

In [29]:
item_similarity = cosine_similarity(matrix.fillna(0).T)
item_similarity_df = pd.DataFrame(item_similarity,index=matrix.columns,columns=matrix.columns)
item_similarity_df.head()

title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Til There Was You (1997),1.0,0.0,0.024561,0.099561,0.185236,0.159265,0.0,0.052203,0.0,0.033326,...,0.0,0.0,0.0,0.027774,0.11884,0.142315,0.02907,0.0,0.110208,0.0
1-900 (1994),0.0,1.0,0.014139,0.009294,0.007354,0.004702,0.010055,0.067038,0.0,0.0,...,0.152499,0.015484,0.0,0.069284,0.018243,0.023408,0.006694,0.07964,0.042295,0.0
101 Dalmatians (1996),0.024561,0.014139,1.0,0.167006,0.061105,0.143878,0.203781,0.225803,0.027642,0.092337,...,0.0,0.021965,0.030905,0.274877,0.204267,0.101199,0.056976,0.172155,0.045714,0.0
12 Angry Men (1957),0.099561,0.009294,0.167006,1.0,0.056822,0.167235,0.304078,0.422506,0.072682,0.394854,...,0.060946,0.016502,0.0,0.40327,0.259436,0.145519,0.105226,0.038901,0.060101,0.081261
187 (1997),0.185236,0.007354,0.061105,0.056822,1.0,0.132327,0.042928,0.06506,0.043133,0.0273,...,0.0,0.141997,0.0,0.068257,0.067786,0.091293,0.09949,0.025184,0.142667,0.096449


## Define a function to get similar movies

In [46]:
def get_similar_movies(movie_name, num_movies=10):

    similar_scores = item_similarity_df[movie_name].sort_values(ascending=False)[1:num_movies+1]

    return similar_scores

In [48]:
get_similar_movies("Quiz Show (1994)", 10)

Unnamed: 0_level_0,Quiz Show (1994)
title,Unnamed: 1_level_1
E.T. the Extra-Terrestrial (1982),0.604541
Forrest Gump (1994),0.599801
"Shawshank Redemption, The (1994)",0.592903
Philadelphia (1993),0.579961
Schindler's List (1993),0.578898
Dead Poets Society (1989),0.578578
Field of Dreams (1989),0.570801
Apollo 13 (1995),0.56871
Back to the Future (1985),0.566076
Raiders of the Lost Ark (1981),0.563397
