# Task 4: Recommendation System

Building a simple movie recommendation system using the MovieLens 100k dataset.

In [1]:

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error


## Load the MovieLens dataset

In [3]:

# Define column names
columns = ['user_id', 'item_id', 'rating', 'timestamp']

# Load ratings data
df = pd.read_csv('u.data', sep='\t', names=columns)

# Load movie titles
movie_titles = pd.read_csv('u.item', sep='|', encoding='latin-1', header=None, usecols=[0,1], names=['item_id', 'title'])

# Merge datasets
df = pd.merge(df, movie_titles, on='item_id')
df.head()


Unnamed: 0,user_id,item_id,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,186,302,3,891717742,L.A. Confidential (1997)
2,22,377,1,878887116,Heavyweights (1994)
3,244,51,2,880606923,Legends of the Fall (1994)
4,166,346,1,886397596,Jackie Brown (1997)


## Create User-Item Matrix

In [4]:

user_movie_matrix = df.pivot_table(index='user_id', columns='title', values='rating')
user_movie_matrix.head()


title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,2.0,5.0,,,3.0,4.0,,,...,,,,5.0,3.0,,,,4.0,
2,,,,,,,,,1.0,,...,,,,,,,,,,
3,,,,,2.0,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,2.0,,,,,4.0,,,...,,,,4.0,,,,,4.0,


## Movie-Based Recommendations

In [5]:

movie_ratings = user_movie_matrix['Star Wars (1977)']
similar_movies = user_movie_matrix.corrwith(movie_ratings)
similar_movies = similar_movies.dropna().sort_values(ascending=False)
similar_movies.head(10)


  c /= stddev[:, None]
  c /= stddev[None, :]
  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)


Unnamed: 0_level_0,0
title,Unnamed: 1_level_1
Hollow Reed (1996),1.0
Stripes (1981),1.0
No Escape (1994),1.0
Man of the Year (1995),1.0
Cosi (1996),1.0
Commandments (1997),1.0
Golden Earrings (1947),1.0
"Scarlet Letter, The (1926)",1.0
Safe Passage (1994),1.0
"Beans of Egypt, Maine, The (1994)",1.0


## User Similarity with Cosine Similarity

In [6]:

user_similarity = cosine_similarity(user_movie_matrix.fillna(0))
user_similarity.shape


(943, 943)

## Predict Ratings and Evaluate with RMSE

In [8]:
# Fill missing ratings with 0
user_ratings_filled = user_movie_matrix.fillna(0).values

# Correct dot product: user similarity (943x943) * ratings (943x1664)
predictions = user_similarity.dot(user_ratings_filled) / user_similarity.sum(axis=1, keepdims=True)

# Evaluate using RMSE
mse = mean_squared_error(user_ratings_filled, predictions)
rmse = np.sqrt(mse)
print("RMSE:", rmse)


RMSE: 0.7980668307672447


### Summary
- Recommended similar movies to 'Star Wars (1977)'.
- Calculated user similarity using Cosine Similarity.
- Predicted ratings and evaluated using RMSE.