In [1]:
import numpy as np
import pandas as pd
from surprise import Dataset, SVD, SVDpp, NMF
from surprise.model_selection import cross_validate, GridSearchCV

# Load the MovieLens 100k dataset
data = Dataset.load_builtin('ml-100k')

# Check the dataset format
df = pd.DataFrame(data.raw_ratings, columns=['user', 'item', 'rating', 'timestamp'])
print(df.head())

# Initialize the SVD algorithm
svd = SVD()

# Evaluate SVD performance using cross-validation
svd_cv_results = cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Define parameter grid for SVD
param_grid = {
    'n_factors': [50, 100, 150],
    'n_epochs': [20, 30],
    'lr_all': [0.002, 0.005],
    'reg_all': [0.02, 0.1]
}

# Perform GridSearchCV to find the best parameters
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)

# Display the best RMSE score
print("Best RMSE score:", gs.best_score['rmse'])

# Display the best parameters
print("Best parameters:", gs.best_params['rmse'])

# Evaluate SVD++ algorithm
svdpp = SVDpp()
svdpp_results = cross_validate(svdpp, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Evaluate NMF algorithm
nmf = NMF()
nmf_results = cross_validate(nmf, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Example: Custom collaborative filtering function
def collaborative_filtering(X, y, learning_rate=0.01, iterations=1000):
    # Initialization of parameters
    m, n = X.shape
    theta = np.random.randn(n)
    
    for i in range(iterations):
        # Hypothesis
        prediction = np.dot(X, theta)
        # Loss function
        cost = (1 / (2 * m)) * np.sum((prediction - y) ** 2)
        # Gradient descent
        gradient = (1 / m) * np.dot(X.T, (prediction - y))
        theta = theta - learning_rate * gradient
        
        if i % 100 == 0:
            print(f"Iteration {i}: Cost = {cost}")
    
    return theta




ModuleNotFoundError: No module named 'surprise'