In [1]:
!pip install pandas scikit-surprise




In [2]:
import pandas as pd

# Load ratings data
ratings = pd.read_csv('u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])

# Load movie titles
movies = pd.read_csv('u.item', sep='|', encoding='latin-1', usecols=[0, 1], names=['movie_id', 'title'])

# Merge ratings and titles
data = pd.merge(ratings, movies, on='movie_id')

# Show the first few rows
data.head()

Unnamed: 0,user_id,movie_id,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,186,302,3,891717742,L.A. Confidential (1997)
2,22,377,1,878887116,Heavyweights (1994)
3,244,51,2,880606923,Legends of the Fall (1994)
4,166,346,1,886397596,Jackie Brown (1997)


In [3]:
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split

# Prepare data for Surprise library
reader = Reader(rating_scale=(1, 5))
surprise_data = Dataset.load_from_df(data[['user_id', 'movie_id', 'rating']], reader)

# Split into train and test sets
trainset, testset = train_test_split(surprise_data, test_size=0.2)

# User-based Collaborative Filtering model
algo = KNNBasic(sim_options={'name': 'cosine', 'user_based': True})
algo.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x11d6489e0>

In [4]:
def get_top_n_recommendations(user_id, n=10):
    all_movie_ids = data['movie_id'].unique()
    rated_movie_ids = data[data['user_id'] == user_id]['movie_id'].tolist()
    predictions = []

    for movie_id in all_movie_ids:
        if movie_id not in rated_movie_ids:
            pred = algo.predict(user_id, movie_id)
            predictions.append((movie_id, pred.est))

    # Sort and get top N
    predictions.sort(key=lambda x: x[1], reverse=True)
    top_movie_ids = [movie_id for movie_id, _ in predictions[:n]]
    top_movies = movies[movies['movie_id'].isin(top_movie_ids)]

    return top_movies['title'].tolist()

In [5]:
user_id = 150  # Change this to test different users (valid range: 1–943)
recommendations = get_top_n_recommendations(user_id)

print(f"Top 10 recommendations for User {user_id}:")
for i, movie in enumerate(recommendations, 1):
    print(f"{i}. {movie}")

Top 10 recommendations for User 150:
1. Great Day in Harlem, A (1994)
2. Prefontaine (1997)
3. Letter From Death Row, A (1998)
4. Star Kid (1997)
5. Anna (1996)
6. Santa with Muscles (1996)
7. Aiqing wansui (1994)
8. Someone Else's America (1995)
9. Entertaining Angels: The Dorothy Day Story (1996)
10. Little City (1998)
