# Movie Recommendation System

## Import libraries :

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD
from sklearn.model_selection import train_test_split

## Loadin Dataset :

In [4]:
columns = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=columns)

movies = pd.read_csv(
    'ml-100k/u.item', sep='|', encoding='latin-1', usecols=[0, 1], names=['movie_id', 'title']
)

ratings = ratings.merge(movies, on='movie_id')
ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,186,302,3,891717742,L.A. Confidential (1997)
2,22,377,1,878887116,Heavyweights (1994)
3,244,51,2,880606923,Legends of the Fall (1994)
4,166,346,1,886397596,Jackie Brown (1997)


## Build the User-Item Rating Matrix :

In [5]:
user_movie_matrix = ratings.pivot_table(index='user_id', columns='title', values='rating')
user_movie_matrix.head()

title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,2.0,5.0,,,3.0,4.0,,,...,,,,5.0,3.0,,,,4.0,
2,,,,,,,,,1.0,,...,,,,,,,,,,
3,,,,,2.0,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,2.0,,,,,4.0,,,...,,,,4.0,,,,,4.0,


## Compute User Similarity (Cosine) :

In [6]:
user_movie_matrix_filled = user_movie_matrix.fillna(0)
user_similarity = cosine_similarity(user_movie_matrix_filled)
user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)
user_similarity_df.head()

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.168937,0.048388,0.064561,0.37967,0.429682,0.443097,0.320079,0.078385,0.377733,...,0.372213,0.11986,0.26986,0.193343,0.197949,0.118722,0.315064,0.149086,0.181612,0.399432
2,0.168937,1.0,0.113393,0.179694,0.073623,0.242106,0.108604,0.104257,0.16247,0.161273,...,0.147095,0.310661,0.363328,0.410725,0.322713,0.231096,0.228793,0.162911,0.175273,0.106732
3,0.048388,0.113393,1.0,0.349781,0.021592,0.074018,0.067423,0.084419,0.062039,0.066217,...,0.033885,0.043453,0.16714,0.071288,0.126278,0.026758,0.164539,0.102899,0.136757,0.02699
4,0.064561,0.179694,0.349781,1.0,0.031804,0.068431,0.091507,0.18806,0.101284,0.060859,...,0.054615,0.036784,0.133619,0.196561,0.146058,0.030202,0.196858,0.152041,0.171538,0.058752
5,0.37967,0.073623,0.021592,0.031804,1.0,0.238636,0.374733,0.24893,0.056847,0.201427,...,0.340183,0.08058,0.095284,0.081053,0.148607,0.071612,0.239955,0.139595,0.153799,0.313941


## Generate Recommendations for a Given User :

In [7]:
def get_user_recommendations(target_user, n_recommendations=5):
    sim_scores = user_similarity_df[target_user]
    
    similar_users = sim_scores[sim_scores > 0].drop(target_user).sort_values(ascending=False)
    
    if similar_users.empty:
        print("No similar users found.")
        return pd.Series(dtype=float)
    
    top_users = similar_users.head(20)
    
    similar_users_ratings = user_movie_matrix.loc[top_users.index].fillna(0)
    
    numerator = similar_users_ratings.T.dot(top_users)
    denominator = top_users.sum()
    
    if denominator == 0:
        print("No valid denominator (sum of similarities = 0).")
        return pd.Series(dtype=float)
    
    weighted_ratings = numerator / denominator
    
    seen_movies = user_movie_matrix.loc[target_user].dropna().index
    recommendations = weighted_ratings.drop(seen_movies, errors='ignore')
    
    recommendations = recommendations.dropna().sort_values(ascending=False).head(n_recommendations)
    
    return recommendations

user_id = 222
recommendations = get_user_recommendations(user_id, 5)
print(f'Top 5 recommendations for User {user_id}:')
recommendations

Top 5 recommendations for User 222:


title
Sound of Music, The (1965)    3.347768
Saint, The (1997)             2.986072
Clockwork Orange, A (1971)    2.908893
Sting, The (1973)             2.850563
In the Line of Fire (1993)    2.849206
dtype: float64

## Split Train/Test and Build on Training Data :

In [8]:
train, test = train_test_split(ratings, test_size=0.2, random_state=42)
train_matrix = train.pivot_table(index='user_id', columns='title', values='rating')
train_matrix_filled = train_matrix.fillna(0)
train_similarity = cosine_similarity(train_matrix_filled)
train_similarity_df = pd.DataFrame(train_similarity, index=train_matrix.index, columns=train_matrix.index)

##  Define recommendation function using training data :

In [None]:
def recommend_from_train(user_id, top_k=5):
    if user_id not in train_matrix.index:
        return []
    
    sim_scores = train_similarity_df[user_id]
    similar_users = sim_scores[sim_scores > 0].drop(user_id, errors='ignore').sort_values(ascending=False).head(20)
    if similar_users.empty:
        return []
    
    similar_ratings = train_matrix.loc[similar_users.index].fillna(0)
    numerator = similar_ratings.T.dot(similar_users)
    denominator = similar_users.sum()
    if denominator == 0:
        return []
    
    predicted_ratings = numerator / denominator
    seen_movies = train_matrix.loc[user_id].dropna().index
    recommendations = predicted_ratings.drop(seen_movies, errors='ignore').sort_values(ascending=False)
    return list(recommendations.head(top_k).index)

## Evaluate Using Precision@K :

In [10]:
def precision_at_k(k=5):
    precisions = []
    users_in_test = test['user_id'].unique()
    
    for user in users_in_test:
        true_likes = test[(test['user_id'] == user) & (test['rating'] >= 4)]['title'].values
        if len(true_likes) == 0:
            continue
        
        recs = recommend_from_train(user, top_k=k)
        if not recs:
            continue
        
        correct = len(set(recs) & set(true_likes))
        precisions.append(correct / k)
    
    if len(precisions) == 0:
        return 0
    return np.mean(precisions)
k = 5
precision_score_k = precision_at_k(k)
print(f"Average Precision@{k}: {precision_score_k:.4f}")

Average Precision@5: 0.2800


## Item-Based Collaborative Filtering :

In [11]:
item_similarity = cosine_similarity(user_movie_matrix_filled.T)
item_similarity_df = pd.DataFrame(item_similarity, index=user_movie_matrix.columns, columns=user_movie_matrix.columns)
item_similarity_df.head()

title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Til There Was You (1997),1.0,0.0,0.024561,0.099561,0.185236,0.159265,0.0,0.052203,0.0,0.033326,...,0.0,0.0,0.0,0.027774,0.11884,0.142315,0.02907,0.0,0.110208,0.0
1-900 (1994),0.0,1.0,0.014139,0.009294,0.007354,0.004702,0.010055,0.067038,0.0,0.0,...,0.152499,0.015484,0.0,0.069284,0.018243,0.023408,0.006694,0.07964,0.042295,0.0
101 Dalmatians (1996),0.024561,0.014139,1.0,0.167006,0.061105,0.143878,0.203781,0.225803,0.027642,0.092337,...,0.0,0.021965,0.030905,0.274877,0.204267,0.101199,0.056976,0.172155,0.045714,0.0
12 Angry Men (1957),0.099561,0.009294,0.167006,1.0,0.056822,0.167235,0.304078,0.422506,0.072682,0.394854,...,0.060946,0.016502,0.0,0.40327,0.259436,0.145519,0.105226,0.038901,0.060101,0.081261
187 (1997),0.185236,0.007354,0.061105,0.056822,1.0,0.132327,0.042928,0.06506,0.043133,0.0273,...,0.0,0.141997,0.0,0.068257,0.067786,0.091293,0.09949,0.025184,0.142667,0.096449


## Matrix Factorization using SVD :

In [12]:
svd = TruncatedSVD(n_components=20)
matrix_reduced = svd.fit_transform(user_movie_matrix_filled)
print('Original shape:', user_movie_matrix_filled.shape)
print('Reduced shape:', matrix_reduced.shape)

Original shape: (943, 1664)
Reduced shape: (943, 20)
