# Install necessary packages

In [None]:
!pip install pandas



# Import libraries

In [None]:
import numpy as np
import pandas as pd
from numpy.linalg import svd
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load the dataset
### 1. Download the MovieLens dataset from http://files.grouplens.org/datasets/movielens/ml-latest-small.zip
### 2. Unzip it and use the 'ratings.csv' file.

In [None]:
import requests
import zipfile

url = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
filename = "ml-latest-small.zip"

# Download the file
response = requests.get(url)
with open(filename, "wb") as f:
    f.write(response.content)

# Unzip the file
with zipfile.ZipFile(filename, "r") as zip_ref:
    zip_ref.extractall()

# Now read the CSV file
df = pd.read_csv('ml-latest-small/ratings.csv')
df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


# Prepare the utility matrix (User-Item matrix)

In [None]:
# Create a matrix where rows are users and columns are movies, and the values are ratings
utility_matrix = df.pivot(index='userId', columns='movieId', values='rating').fillna(0)
print("Utility Matrix Shape:", utility_matrix.shape)

# Convert to numpy array
R = utility_matrix.to_numpy()

Utility Matrix Shape: (610, 9724)


# Apply Singular Value Decomposition

In [None]:
# Perform SVD on the user-item matrix
U, sigma, Vt = svd(R, full_matrices=False)

# Convert sigma to diagonal matrix form
sigma = np.diag(sigma)

# Reconstruct the user-item matrix with reduced dimensions (using k latent features)

In [None]:
k = 100  # You can experiment with different values of k
R_pred = np.dot(np.dot(U[:, :k], sigma[:k, :k]), Vt[:k, :])

# Convert back to DataFrame
R_pred_df = pd.DataFrame(R_pred, columns=utility_matrix.columns, index=utility_matrix.index)

# Evaluate accuracy using Mean Squared Error (MSE)

In [None]:
# We will only calculate the MSE for the non-zero elements (actual ratings)
non_zero_indices = np.where(R != 0)
R_actual = R[non_zero_indices]
R_predicted = R_pred[non_zero_indices]

# Calculate the MSE
mse = mean_squared_error(R_actual, R_predicted)
print(f"Mean Squared Error (MSE) of the predicted ratings: {mse}")

# Calculate the RMSE
rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE) of the predicted ratings: {rmse}")

# Calculate the MAE
mae = mean_absolute_error(R_actual, R_predicted)
print(f"Mean Absolute Error (MAE) of the predicted ratings: {mae}")

Mean Squared Error (MSE) of the predicted ratings: 2.5032493290526685
Root Mean Squared Error (RMSE) of the predicted ratings: 1.582166024490688
Mean Absolute Error (MAE) of the predicted ratings: 1.0778029549053902


# Recommend movies for a specific user

In [None]:
user_id = 1  # Example user ID
user_ratings = R_pred_df.loc[user_id].sort_values(ascending=False)

# Movies that the user has already rated
already_rated = df[df['userId'] == user_id]['movieId'].values

# Recommend top 5 movies that the user has not rated yet
recommendations = user_ratings.drop(already_rated).head(5)
print("Top 5 movie recommendations for User {}: ".format(user_id))
print(recommendations)

Top 5 movie recommendations for User 1: 
movieId
4571    2.044922
1639    2.018153
750     2.009929
454     1.995224
1200    1.972056
Name: 1, dtype: float64
