In [1]:
!pip install numpy pandas scipy scikit-learn




In [2]:
import numpy as np  
import pandas as pd  
from scipy.sparse.linalg import svds  
from sklearn.metrics import mean_squared_error  


In [3]:
# Load the dataset
column_names = ['user_id', 'item_id', 'rating', 'timestamp']
ratings_df = pd.read_csv('C:/Users/pbpat/OneDrive/Documents/Machine Learning/task4/u.data.zip', sep='\t', names=column_names)

# Display the first 5 rows
ratings_df.head()


Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [4]:
# Drop the timestamp column
ratings_df = ratings_df.drop(columns=['timestamp'])

# Display the first 5 rows again
ratings_df.head()


Unnamed: 0,user_id,item_id,rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1


In [5]:
# Create a User-Item matrix
user_item_matrix = ratings_df.pivot(index='user_id', columns='item_id', values='rating')

# Fill missing values (unrated movies) with 0
user_item_matrix = user_item_matrix.fillna(0)

# Display first 5 rows
user_item_matrix.head()


item_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
from sklearn.decomposition import TruncatedSVD

# Define the number of features (latent factors)
n_components = 20  # You can adjust this number

# Initialize Truncated SVD
svd = TruncatedSVD(n_components=n_components)

# Fit and transform the User-Item matrix
user_matrix = svd.fit_transform(user_item_matrix)

# Display shape of transformed matrix
user_matrix.shape


(943, 20)

In [7]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute cosine similarity between users
user_similarities = cosine_similarity(user_matrix)

# Display similarity matrix shape
user_similarities.shape


(943, 943)

In [8]:
import numpy as np

def recommend_movies(user_id, user_similarities, user_item_matrix, num_recommendations=5):
    # Get similarity scores for the given user
    similar_users = np.argsort(user_similarities[user_id - 1])[::-1]  # Sorting users by similarity (descending)

    # Find movies the target user has already rated
    watched_movies = set(user_item_matrix.columns[user_item_matrix.iloc[user_id - 1] > 0])

    # Store movie scores
    movie_scores = {}

    # Go through similar users and get their highest-rated movies
    for similar_user in similar_users[1:]:  # Skip the user itself
        # Get highly-rated movies by this similar user
        high_rated_movies = user_item_matrix.iloc[similar_user][user_item_matrix.iloc[similar_user] > 4].index

        for movie in high_rated_movies:
            if movie not in watched_movies:  # Recommend only unseen movies
                if movie in movie_scores:
                    movie_scores[movie] += user_similarities[user_id - 1][similar_user]  # Weight by similarity
                else:
                    movie_scores[movie] = user_similarities[user_id - 1][similar_user]

    # Sort movies by score and return the top recommendations
    recommended_movies = sorted(movie_scores, key=movie_scores.get, reverse=True)[:num_recommendations]
    
    return recommended_movies


In [11]:
user_id = 1  # Change this to any user ID
recommended_movies = recommend_movies(user_id, user_similarities, user_item_matrix)

print(f"Top recommendations for User {user_id}: {recommended_movies}")


Top recommendations for User 1: [318, 483, 357, 496, 313]


In [14]:
# Load movie titles dataset
movies_df = pd.read_csv('C:/Users/pbpat/OneDrive/Documents/Machine Learning/task4/u.item', sep='|', encoding='latin-1', header=None, usecols=[0, 1], names=['item_id', 'title'])

# Display the first few rows
movies_df.head()


Unnamed: 0,item_id,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [15]:
# Merge recommendations with movie titles
recommended_movies = movies_df[movies_df['item_id'].isin([318, 483, 357, 496, 313])]

# Display the recommended movie titles
print(recommended_movies)


     item_id                                   title
312      313                          Titanic (1997)
317      318                 Schindler's List (1993)
356      357  One Flew Over the Cuckoo's Nest (1975)
482      483                       Casablanca (1942)
495      496            It's a Wonderful Life (1946)
