In [1]:
from surprise import Dataset
from surprise.prediction_algorithms import knns
from surprise.model_selection import cross_validate, train_test_split, GridSearchCV

In [6]:
from surprise import Dataset, Reader
from surprise.model_selection import GridSearchCV
from surprise import KNNBasic, KNNBaseline, KNNWithMeans, KNNWithZScore
from surprise.model_selection import train_test_split
from collections import defaultdict

# Loading the movielens-100k
data = Dataset.load_builtin("ml-100k")

# test set is made of 30% of the ratings
trainset, testset = train_test_split(data, test_size=0.30)

# Define the algorithms to test
algos = [
    KNNBasic,  # Note: removed parentheses - we pass the class, not instance
    KNNBaseline,
    KNNWithMeans,
    KNNWithZScore
]

# Define parameters for each algorithm
param_grids = defaultdict(dict)

# Basic parameters for all algorithms
base_params = {
    'k': [10, 20, 50, 100],
    'min_k': [2, 4, 6, 8],
    'sim_options': {
        'name': ['cosine', 'msd', 'pearson', 'pearson_baseline'],
        'min_support': [1, 5],
        'user_based': [True, False]
    },
    'verbose': [False]
}

# Add base params to all algorithms
for algo in algos:
    param_grids[algo] = dict(base_params)

# Add specific params for KNNBaseline
param_grids[KNNBaseline].update({
    'bsl_options': {
        'method': ['als', 'sgd'],
        'reg': [1, 2]
    }
})

results = []

# Performing Grid Search for best estimator
for algo in algos:
    print(f"\nRunning grid search for {algo.__name__}")

    gs = GridSearchCV(algo,
                      param_grids[algo],
                      measures=['rmse', 'mae'],
                      cv=5)

    gs.fit(data)

    # Get best RMSE and MAE scores
    best_rmse = gs.best_score['rmse']
    best_mae = gs.best_score['mae']

    # Get best parameters
    best_params = gs.best_params['rmse']  # using RMSE params

    results.append({
        'algorithm': algo.__name__,
        'best_rmse': best_rmse,
        'best_mae': best_mae,
        'best_params': best_params
    })

    print(f"Best RMSE: {best_rmse}")
    print(f"Best MAE: {best_mae}")
    print(f"Best parameters: {best_params}")

# Print final summary
print("\nFinal Summary:")
for result in results:
    print(f"\nAlgorithm: {result['algorithm']}")
    print(f"Best RMSE: {result['best_rmse']:.4f}")
    print(f"Best MAE: {result['best_mae']:.4f}")


Running grid search for KNNBasic
Best RMSE: 0.9716130125245481
Best MAE: 0.7659787197420254
Best parameters: {'k': 20, 'min_k': 2, 'sim_options': {'name': 'msd', 'min_support': 5, 'user_based': True}, 'verbose': False}

Running grid search for KNNBaseline
Best RMSE: 0.9146800892000091
Best MAE: 0.7185465589848159
Best parameters: {'k': 50, 'min_k': 8, 'sim_options': {'name': 'pearson_baseline', 'min_support': 1, 'user_based': False}, 'verbose': False, 'bsl_options': {'method': 'als', 'reg': 1}}

Running grid search for KNNWithMeans
Best RMSE: 0.9215293349039587
Best MAE: 0.7204320399171453
Best parameters: {'k': 50, 'min_k': 4, 'sim_options': {'name': 'pearson_baseline', 'min_support': 1, 'user_based': False}, 'verbose': False}

Running grid search for KNNWithZScore
Best RMSE: 0.9231723365795714
Best MAE: 0.7216026031774573
Best parameters: {'k': 50, 'min_k': 4, 'sim_options': {'name': 'pearson_baseline', 'min_support': 1, 'user_based': False}, 'verbose': False}

Final Summary:

Algor

In [2]:
from surprise import AlgoBase
from surprise import Dataset
from surprise.model_selection import train_test_split
from surprise import accuracy
import numpy as np
from collections import defaultdict

class ItemAveragePredictor(AlgoBase):
    def __init__(self):
        AlgoBase.__init__(self)
        self.item_means = {}

    def fit(self, trainset):
        AlgoBase.fit(self, trainset)

        # Calculate mean rating for each item
        item_ratings = defaultdict(list)
        for uid, iid, rating in trainset.all_ratings():
            item_ratings[iid].append(rating)

        # Calculate and store mean for each item
        for iid in item_ratings:
            self.item_means[iid] = np.mean(item_ratings[iid])

        # Calculate global mean for items without ratings
        self.global_mean = trainset.global_mean

        return self

    def estimate(self, u, i):
        # If item has no ratings, return global mean
        return self.item_means.get(i, self.global_mean)

# Load the same dataset
data = Dataset.load_builtin("ml-100k")
trainset, testset = train_test_split(data, test_size=0.30)

# Train and evaluate the naive predictor
naive_algo = ItemAveragePredictor()
naive_algo.fit(trainset)
predictions = naive_algo.test(testset)

# Calculate RMSE and MAE
naive_rmse = accuracy.rmse(predictions)
naive_mae = accuracy.mae(predictions)

print("\nNaive Item Average Predictor Results:")
print(f"RMSE: {naive_rmse:.4f}")
print(f"MAE: {naive_mae:.4f}")

RMSE: 1.0225
MAE:  0.8167

Naive Item Average Predictor Results:
RMSE: 1.0225
MAE: 0.8167


### **Comparison of Algorithm Performance**

#### **1. Naive Item Average Predictor**
- **RMSE**: 1.0225  
- **MAE**: 0.8167  
The Naive Item Average Predictor computes predictions by averaging item ratings, which results in the highest error rates among all the tested algorithms. Its simplicity and lack of personalization make it less effective for capturing user-item preferences.

---

#### **2. KNN Algorithms**
**a. KNNBasic**  
- **Best RMSE**: 0.9716  
- **Best MAE**: 0.7660  
KNNBasic, which uses a simple k-nearest neighbors approach, demonstrates a significant improvement over the Naive Item Average Predictor. It provides a better understanding of user or item similarity but does not incorporate advanced baseline adjustments.

**b. KNNBaseline**  
- **Best RMSE**: 0.9147  
- **Best MAE**: 0.7185  
KNNBaseline achieves the best performance among the tested algorithms. By incorporating baseline estimates (e.g., average ratings adjusted for biases), it effectively reduces prediction errors, particularly in RMSE and MAE.

**c. KNNWithMeans**  
- **Best RMSE**: 0.9215  
- **Best MAE**: 0.7204  
KNNWithMeans improves upon KNNBasic by considering the mean ratings of users or items. While slightly less accurate than KNNBaseline, it still outperforms the Naive Predictor.

**d. KNNWithZScore**  
- **Best RMSE**: 0.9232  
- **Best MAE**: 0.7216  
KNNWithZScore introduces Z-score normalization, but its performance is comparable to KNNWithMeans and slightly less accurate than KNNBaseline.

---

### **Summary**
- **KNNBaseline** is the top-performing algorithm, achieving the lowest RMSE (0.9147) and MAE (0.7185). Its use of baseline adjustments and sophisticated similarity metrics proves highly effective.
- The Naive Item Average Predictor has the highest error rates, highlighting the need for personalized algorithms in recommendation systems.
- Among the KNN-based models, KNNBasic provides a good baseline, while KNNBaseline's enhancements make it the best overall choice for minimizing prediction errors.
