In [None]:
%pip install pandas numpy scikit-surprise fastai

In [None]:
import zipfile
import urllib.request
import os

# URL for the MovieLens 100K dataset
url = 'http://files.grouplens.org/datasets/movielens/ml-100k.zip'

# Download the file
dataset_path = 'ml-100k.zip'
urllib.request.urlretrieve(url, dataset_path)

# Extract the zip file
with zipfile.ZipFile(dataset_path, 'r') as zip_ref:
    zip_ref.extractall('.')

# Check the extracted files
os.listdir('./ml-100k/')

In [None]:
import pandas as pd

# Load the data
ratings_path = './ml-100k/u.data'
ratings = pd.read_csv(ratings_path, sep='\t', header=None, names=['user', 'movie', 'rating', 'timestamp'])

# Display the first few rows of the DataFrame
ratings.head()

In [None]:
import pandas as pd
import numpy as np

# Example DataFrame
# ratings = pd.read_csv('path_to_ratings.csv')

# Creating the user-item matrix
user_movie_matrix = ratings.pivot_table(index='user', columns='movie', values='rating')
user_movie_matrix

In [None]:
user_movie_matrix.fillna(user_movie_matrix.mean())

In [None]:
U, sigma, Vt = np.linalg.svd(user_movie_matrix_filled, full_matrices=False)


In [None]:
# Choose the number of factors
num_features = 20  # for example
sigma_reduced = np.diag(sigma[:num_features])

# Reduced matrices
U_reduced = U[:, :num_features]
Vt_reduced = Vt[:num_features, :]

In [None]:
approx_ratings = np.dot(np.dot(U_reduced, sigma_reduced), Vt_reduced)


In [None]:
# Load the movie details
movie_titles_path = './ml-100k/u.item'
movie_titles = pd.read_csv(movie_titles_path, sep='|', encoding='latin-1', header=None, usecols=[0, 1], names=['movie_id', 'title'])

# Display the first few rows
movie_titles.head()


In [None]:
# Create a dictionary for mapping
movie_id_to_title = pd.Series(movie_titles.title.values, index=movie_titles.movie_id).to_dict()


In [None]:
def recommend_movies(user_id, num_recommendations):
    user_ratings = approx_ratings[user_id - 1]
    sorted_movie_indexes = np.argsort(-user_ratings)
    return sorted_movie_indexes[:num_recommendations]

# Example: Recommend 5 movies for user with ID 1
recommended_movies = recommend_movies(2, 5)
recommended_movies

In [None]:
# Assuming 'recommended_movies' is a list of movie indices
recommended_movie_ids = user_movie_matrix.columns[recommended_movies].tolist()

# Translate indices to movie names
recommended_movie_titles = [movie_id_to_title[movie_id] for movie_id in recommended_movie_ids]

recommended_movie_titles

In [None]:
from fastai.collab import *
from fastai.tabular.all import *

In [None]:
path = untar_data(URLs.ML_100k)
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None,
                      names=['user','movie','rating','timestamp'])
movies = pd.read_csv(path/'u.item',  delimiter='|', encoding='latin-1',
                     usecols=(0,1), names=('movie','title'), header=None)
ratings = ratings.merge(movies) # Merging movies with ratings

# Load into DataLoaders
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)

In [None]:
learn = collab_learner(dls, n_factors=50, y_range=(0, 5.5))
learn.fit_one_cycle(5, 5e-3)

In [None]:
movie_bias = learn.model.i_bias.weight.squeeze()
idxs = movie_bias.argsort(descending=True)[:5]
[dls.classes['title'][i] for i in idxs]