# Install necessary packages

In [None]:
!pip install pandas scikit-learn



# Import libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import NMF
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load the dataset
### 1. Download the MovieLens dataset from http://files.grouplens.org/datasets/movielens/ml-latest-small.zip
### 2. Unzip it and use the 'ratings.csv' file.

In [None]:
import requests
import zipfile

url = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
filename = "ml-latest-small.zip"

# Download the file
response = requests.get(url)
with open(filename, "wb") as f:
    f.write(response.content)

# Unzip the file
with zipfile.ZipFile(filename, "r") as zip_ref:
    zip_ref.extractall()

# Now read the CSV file
df = pd.read_csv('ml-latest-small/ratings.csv')
df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


# Prepare the utility matrix (User-Item matrix)

In [None]:
# Create a matrix where rows are users and columns are movies, and the values are ratings
utility_matrix = df.pivot(index='userId', columns='movieId', values='rating').fillna(0)
print("Utility Matrix Shape:", utility_matrix.shape)

# Convert to numpy array
R = utility_matrix.to_numpy()

Utility Matrix Shape: (610, 9724)


# Apply Nonnegative Matrix Factorization

In [None]:
num_features = 50  # Number of latent features

# Initialize the NMF model
nmf_model = NMF(n_components=num_features, init='random', random_state=0, max_iter=1000)

# Fit the model and transform the data
P = nmf_model.fit_transform(R)  # User features matrix
Q = nmf_model.components_       # Item features matrix

# Reconstruct the user-item matrix with reduced dimensions

In [None]:
R_pred = np.dot(P, Q)
R_pred_df = pd.DataFrame(R_pred, index=utility_matrix.index, columns=utility_matrix.columns)
R_pred_df

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.910195,1.386239,0.538029,0.049864,0.253092,2.387703,0.335278,0.009879,0.115873,1.868793,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.364337,0.148400,0.000751,0.000144,0.032169,0.000160,0.002358,0.000443,0.000000,0.002928,...,0.004392,0.003765,0.005020,0.005020,0.004392,0.005020,0.004392,0.004392,0.004392,0.019362
3,0.055454,0.074580,0.052222,0.000000,0.000000,0.085949,0.019011,0.000000,0.000844,0.078274,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000040
4,2.198687,0.293097,0.139636,0.045016,0.119628,0.422201,0.168617,0.007692,0.020018,0.230533,...,0.000228,0.000196,0.000261,0.000261,0.000228,0.000261,0.000228,0.000228,0.000228,0.000000
5,0.827382,0.687950,0.071266,0.199087,0.157268,0.749098,0.365178,0.034863,0.000000,1.184856,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.438785,0.003061,0.000000,0.000000,0.000015,0.000499,2.404292,0.000000,0.000000,0.025817,...,0.000371,0.000318,0.000424,0.000424,0.000371,0.000424,0.000371,0.000371,0.000371,0.000030
607,1.848579,1.015705,0.298033,0.067483,0.142383,1.522778,0.226807,0.019514,0.086274,1.857530,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.002644
608,2.517007,1.772768,1.182376,0.066093,0.119229,3.418894,0.350277,0.049286,0.004386,3.821133,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000450
609,0.787279,0.591491,0.026547,0.000000,0.055343,0.252775,0.039572,0.000711,0.024493,1.307020,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000746


# Evaluate accuracy using Mean Squared Error (MSE)

In [None]:
# We will only calculate the MSE for the non-zero elements (actual ratings)
non_zero_indices = np.where(R != 0)
R_actual = R[non_zero_indices]
R_predicted = R_pred[non_zero_indices]

# Calculate the MSE
mse = mean_squared_error(R_actual, R_predicted)
print(f"Mean Squared Error (MSE) of the predicted ratings: {mse}")

# Calculate the RMSE
rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE) of the predicted ratings: {rmse}")

# Calculate the MAE
mae = mean_absolute_error(R_actual, R_predicted)
print(f"Mean Absolute Error (MAE) of the predicted ratings: {mae}")

Mean Squared Error (MSE) of the predicted ratings: 4.6695146078414576
Root Mean Squared Error (RMSE) of the predicted ratings: 2.1609059692271337
Mean Absolute Error (MAE) of the predicted ratings: 1.618334908708077


# Recommend movies for a specific user

In [None]:
user_id = 1  # Example user ID
user_ratings = R_pred_df.loc[user_id].sort_values(ascending=False)

# Movies that the user has already rated
already_rated = df[df['userId'] == user_id]['movieId'].values

# Recommend top 5 movies that the user has not rated yet
recommendations = user_ratings.drop(already_rated).head(5)
print("Top 5 movie recommendations for User {}: ".format(user_id))
print(recommendations)

Top 5 movie recommendations for User 1: 
movieId
1200    3.838756
1374    3.592448
589     3.385714
858     3.274947
924     3.251916
Name: 1, dtype: float64
