In [1]:


from rec_engine.Algorithms.ContentBased.MovieLens import MovieLens
from rec_engine.Algorithms.ContentBased.ContentKNNAlgorithm import ContentKNNAlgorithm
from rec_engine.Algorithms.ContentBased.Evaluator import Evaluator
from rec_engine.Algorithms.Hybrid.HybridAlgorithm import HybridAlgorithm
from surprise import NormalPredictor
from surprise import SVD, SVDpp
from surprise import KNNBasic
from surprise.model_selection import GridSearchCV
import random
import numpy as np
import pandas as pd

Current file path (os.path): d:\ITI\Rec_Sys_Intake_45\project_descrption\project\rec_engine\Algorithms\ContentBased\MovieLens.py


# Load the Data

In [2]:
def LoadMovieLensData():
    ml = MovieLens()
    print("Loading movie ratings...")
    data = ml.loadMovieLensLatestSmall()
    print("\nComputing movie popularity ranks so we can measure novelty later...")
    rankings = ml.getPopularityRanks()
    return (ml, data, rankings)

# Using ContentKNNAlgorithm

In [4]:
np.random.seed(0)
random.seed(0)

# Load up common data set for the recommender algorithms
(ml, evaluationData, rankings) = LoadMovieLensData()

# Construct an Evaluator to, you know, evaluate them
evaluator = Evaluator(evaluationData, rankings)

contentKNN = ContentKNNAlgorithm()
evaluator.AddAlgorithm(contentKNN, "ContentKNN")

# Just make random recommendations
Random = NormalPredictor()
evaluator.AddAlgorithm(Random, "Random")

evaluator.Evaluate(False)

evaluator.SampleTopNRecs(ml)

Loading movie ratings...

Computing movie popularity ranks so we can measure novelty later...
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating  ContentKNN ...
Evaluating accuracy...
Computing content-based similarity matrix...
0  of  8775
100  of  8775
200  of  8775
300  of  8775
400  of  8775
500  of  8775
600  of  8775
700  of  8775
800  of  8775
900  of  8775
1000  of  8775
1100  of  8775
1200  of  8775
1300  of  8775
1400  of  8775
1500  of  8775
1600  of  8775
1700  of  8775
1800  of  8775
1900  of  8775
2000  of  8775
2100  of  8775
2200  of  8775
2300  of  8775
2400  of  8775
2500  of  8775
2600  of  8775
2700  of  8775
2800  of  8775
2900  of  8775
3000  of  8775
3100  of  8775
3200  of  8775
3300  of  8775
3400  of  8775
3500  of  8775
3600  of  8775
3700  of  8775
3800  of  8775
3900  of  8775
4000  of  8775
4100  of  8775
4200  of  8775
4300  of  8775
4400  of  8775
4500  of  8775
4600  of  8775
4700  of  8

# Metrics Result
| Algorithm   | RMSE    | MAE     |
|-------------|---------|---------|
| ContentKNN | 0.9055  | 0.6983  |
| Random     | 1.4283  | 1.1414  |

# Using SVD & SVD++

In [6]:

np.random.seed(0)
random.seed(0)

# Load up common data set for the recommender algorithms
(ml, evaluationData, rankings) = LoadMovieLensData()

# Construct an Evaluator to, you know, evaluate them
evaluator = Evaluator(evaluationData, rankings)

# SVD
SVD = SVD()
evaluator.AddAlgorithm(SVD, "SVD")

# SVD++
SVDPlusPlus = SVDpp()
evaluator.AddAlgorithm(SVDPlusPlus, "SVD++")

# Just make random recommendations
Random = NormalPredictor()
evaluator.AddAlgorithm(Random, "Random")

# Fight!
evaluator.Evaluate(False)

evaluator.SampleTopNRecs(ml)


Loading movie ratings...

Computing movie popularity ranks so we can measure novelty later...
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating  ContentKNN ...
Evaluating accuracy...
Computing content-based similarity matrix...
0  of  8775
100  of  8775
200  of  8775
300  of  8775
400  of  8775
500  of  8775
600  of  8775
700  of  8775
800  of  8775
900  of  8775
1000  of  8775
1100  of  8775
1200  of  8775
1300  of  8775
1400  of  8775
1500  of  8775
1600  of  8775
1700  of  8775
1800  of  8775
1900  of  8775
2000  of  8775
2100  of  8775
2200  of  8775
2300  of  8775
2400  of  8775
2500  of  8775
2600  of  8775
2700  of  8775
2800  of  8775
2900  of  8775
3000  of  8775
3100  of  8775
3200  of  8775
3300  of  8775
3400  of  8775
3500  of  8775
3600  of  8775
3700  of  8775
3800  of  8775
3900  of  8775
4000  of  8775
4100  of  8775
4200  of  8775
4300  of  8775
4400  of  8775
4500  of  8775
4600  of  8775
4700  of  8

# Metrics Result
| Algorithm   | RMSE    | MAE     |
|-------------|---------|---------|
| ContentKNN  | 0.9055  | 0.6983  |
| Random      | 1.4265  | 1.1400  |
| SVD         | 0.8774  | 0.6741  |
| SVD++       | 0.8721  | 0.6669  |

# Tuning the sdv to get lower error

In [3]:
(ml, evaluationData, rankings) = LoadMovieLensData()

print("Searching for best parameters...")
param_grid = {'n_epochs': [10,20,], 'lr_all': [0.001,0.005],
              'n_factors': [20,50]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=4)

gs.fit(evaluationData)

# best RMSE score
print("Best RMSE score attained: ", gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

# Construct an Evaluator to, you know, evaluate them
evaluator = Evaluator(evaluationData, rankings)

params = gs.best_params['rmse']
SVDtuned = SVD(n_epochs = params['n_epochs'], lr_all = params['lr_all'], n_factors = params['n_factors'])
evaluator.AddAlgorithm(SVDtuned, "SVD - Tuned")

SVDUntuned = SVD()
evaluator.AddAlgorithm(SVDUntuned, "SVD - Untuned")

# Just make random recommendations
Random = NormalPredictor()
evaluator.AddAlgorithm(Random, "Random")

# Fight!
evaluator.Evaluate(False)

evaluator.SampleTopNRecs(ml)

Loading movie ratings...

Computing movie popularity ranks so we can measure novelty later...
Searching for best parameters...
Best RMSE score attained:  0.8716561680357514
{'n_epochs': 20, 'lr_all': 0.005, 'n_factors': 20}
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating  SVD - Tuned ...
Evaluating accuracy...
Analysis complete.
Evaluating  SVD - Untuned ...
Evaluating accuracy...
Analysis complete.
Evaluating  Random ...
Evaluating accuracy...
Analysis complete.


Algorithm  RMSE       MAE       
SVD - Tuned 0.8756     0.6719    
SVD - Untuned 0.8809     0.6758    
Random     1.4279     1.1386    

Legend:

RMSE:      Root Mean Squared Error. Lower values mean better accuracy.
MAE:       Mean Absolute Error. Lower values mean better accuracy.

Using recommender  SVD - Tuned

Building recommendation model...
Computing recommendations...

We recommend:
Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bo

# Using Item-based KNN

In [4]:
np.random.seed(0)
random.seed(0)

# Load up common data set for the recommender algorithms
(ml, evaluationData, rankings) = LoadMovieLensData()

# Construct an Evaluator to, you know, evaluate them
evaluator = Evaluator(evaluationData, rankings)


# Item-based KNN
ItemKNN = KNNBasic(sim_options = {'name': 'cosine', 'user_based': False})
evaluator.AddAlgorithm(ItemKNN, "Item KNN")

# Just make random recommendations
Random = NormalPredictor()
evaluator.AddAlgorithm(Random, "Random")

# Fight!
evaluator.Evaluate(False)

evaluator.SampleTopNRecs(ml)

Loading movie ratings...

Computing movie popularity ranks so we can measure novelty later...
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating  Item KNN ...
Evaluating accuracy...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Analysis complete.
Evaluating  Random ...
Evaluating accuracy...
Analysis complete.
Evaluating  Item KNN ...
Evaluating accuracy...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Analysis complete.
Evaluating  Random ...
Evaluating accuracy...
Analysis complete.


Algorithm  RMSE       MAE       
Item KNN   0.9788     0.7610    
Random     1.4283     1.1414    

Legend:

RMSE:      Root Mean Squared Error. Lower values mean better accuracy.
MAE:       Mean Absolute Error. Lower values mean better accuracy.

Using recommender  Item KNN

Building recommendation model...
Computing the cosine similarity matrix...
Done computing similarity mat

# Final Metric Result
| Algorithm   | RMSE    | MAE     |
|-------------|---------|---------|
| ContentKNN | 0.9055  | 0.6983  |
| Random     | 1.4265  | 1.1400  |
| SVD        | 0.8774  | 0.6741  |
| SVD++      | 0.8721  | 0.6669  |
| Item KNN   | 0.9788  | 0.7610  |

# NeuMF Algorithm
 

In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

# Load data
df = pd.read_csv(r"D:\ITI\Rec_Sys_Intake_45\project_descrption\project\data\ml-latest-small\ratings.csv")

# Create mappings
user_ids = df["userId"].unique()
item_ids = df["movieId"].unique()


df["rating"] = (df["rating"] - 0.5) / 4.5  # Normalize ratings to [0, 1]

user_to_idx = {user: idx for idx, user in enumerate(user_ids)}
item_to_idx = {item: idx for idx, item in enumerate(item_ids)}

# Convert to indices
df["user_idx"] = df["userId"].map(user_to_idx)
df["item_idx"] = df["movieId"].map(item_to_idx)

# Split data
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Create PyTorch Dataset
class RatingDataset(Dataset):
    def __init__(self, df):
        self.users = torch.LongTensor(df["user_idx"].values)
        self.items = torch.LongTensor(df["item_idx"].values)
        self.ratings = torch.FloatTensor(df["rating"].values)
        
    def __len__(self):
        return len(self.users)
    
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]

train_dataset = RatingDataset(train_df)
test_dataset = RatingDataset(test_df)


# Data loaders
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [24]:
class NeuMF(nn.Module):
    def __init__(self, num_users, num_items, mf_dim=8, mlp_dim=32, layers=[64, 32, 16]):
        super(NeuMF, self).__init__()
        
        # MF embeddings
        self.mf_user_embedding = nn.Embedding(num_users, mf_dim)
        self.mf_item_embedding = nn.Embedding(num_items, mf_dim)
        
        # MLP embeddings
        self.mlp_user_embedding = nn.Embedding(num_users, mlp_dim)
        self.mlp_item_embedding = nn.Embedding(num_items, mlp_dim)
        
        # MLP layers
        input_size = mlp_dim * 2  # Concatenated user and item embeddings
        self.mlp = nn.Sequential(
            nn.Linear(input_size, layers[0]),
            nn.ReLU(),
            nn.Dropout(0.2)
        )
        
        # Add remaining layers dynamically
        for i in range(1, len(layers)):
            self.mlp.add_module(f"fc{i}", nn.Linear(layers[i-1], layers[i]))
            self.mlp.add_module(f"relu{i}", nn.ReLU())
            self.mlp.add_module(f"dropout{i}", nn.Dropout(0.2))
        
        # Final layer
        self.predict_layer = nn.Linear(mf_dim + layers[-1], 1)
        
        # Initialize weights
        self._init_weight_()
    
    def _init_weight_(self):
        nn.init.normal_(self.mf_user_embedding.weight, mean=0.0, std=0.01)
        nn.init.normal_(self.mf_item_embedding.weight, mean=0.0, std=0.01)
        nn.init.normal_(self.mlp_user_embedding.weight, mean=0.0, std=0.01)
        nn.init.normal_(self.mlp_item_embedding.weight, mean=0.0, std=0.01)
        
        for layer in self.mlp:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
        
        nn.init.kaiming_uniform_(self.predict_layer.weight, a=1, nonlinearity='sigmoid')
    
    def forward(self, user, item):
        # MF part
        mf_user_embedded = self.mf_user_embedding(user)
        mf_item_embedded = self.mf_item_embedding(item)
        mf_output = mf_user_embedded * mf_item_embedded  # element-wise product
        
        # MLP part
        mlp_user_embedded = self.mlp_user_embedding(user)
        mlp_item_embedded = self.mlp_item_embedding(item)
        mlp_input = torch.cat([mlp_user_embedded, mlp_item_embedded], dim=-1)
        mlp_output = self.mlp(mlp_input)
        
        # Concatenate MF and MLP parts
        output = torch.cat([mf_output, mlp_output], dim=-1)
        output = self.predict_layer(output)
        output = torch.sigmoid(output) * 4.5 + 0.5  # Scale to [0.5, 5.0]
        
        return output.squeeze()

In [25]:
# Initialize model
num_users = len(user_ids)
num_items = len(item_ids)
#model = NeuMF(num_users, num_items)
model = NeuMF(num_users, num_items, mf_dim=16, mlp_dim=64, layers=[128, 64, 32])

# Loss and optimizer
criterion = nn.MSELoss()  
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [46]:
def train(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0.0
    
    for batch_idx, (users, items, ratings) in enumerate(dataloader):
        users, items, ratings = users.to(device), items.to(device), ratings.to(device)
        
        optimizer.zero_grad()
        outputs = model(users, items)
        loss = criterion(outputs, ratings)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(dataloader)

# Training
num_epochs = 50
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}')

Epoch [1/50], Train Loss: 0.0179
Epoch [2/50], Train Loss: 0.0179
Epoch [3/50], Train Loss: 0.0178
Epoch [4/50], Train Loss: 0.0180
Epoch [5/50], Train Loss: 0.0180
Epoch [6/50], Train Loss: 0.0179
Epoch [7/50], Train Loss: 0.0184
Epoch [8/50], Train Loss: 0.0177
Epoch [9/50], Train Loss: 0.0177
Epoch [10/50], Train Loss: 0.0177
Epoch [11/50], Train Loss: 0.0179
Epoch [12/50], Train Loss: 0.0179
Epoch [13/50], Train Loss: 0.0178
Epoch [14/50], Train Loss: 0.0181
Epoch [15/50], Train Loss: 0.0179
Epoch [16/50], Train Loss: 0.0177
Epoch [17/50], Train Loss: 0.0177
Epoch [18/50], Train Loss: 0.0177
Epoch [19/50], Train Loss: 0.0181
Epoch [20/50], Train Loss: 0.0177
Epoch [21/50], Train Loss: 0.0180
Epoch [22/50], Train Loss: 0.0179
Epoch [23/50], Train Loss: 0.0176
Epoch [24/50], Train Loss: 0.0177
Epoch [25/50], Train Loss: 0.0180
Epoch [26/50], Train Loss: 0.0178
Epoch [27/50], Train Loss: 0.0177
Epoch [28/50], Train Loss: 0.0180
Epoch [29/50], Train Loss: 0.0178
Epoch [30/50], Train Lo

In [47]:
def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for users, items, ratings in dataloader:
            users, items, ratings = users.to(device), items.to(device), ratings.to(device)
            outputs = model(users, items)
            total_loss += criterion(outputs, ratings).item()
    return (total_loss / len(dataloader)) ** 0.5  # Convert MSE to RMSE

test_rmse = evaluate(model, test_loader, nn.MSELoss(), device)
print(f'Test RMSE: {test_rmse:.4f}')

Test RMSE: 0.3325


In [28]:
def get_top_k_recommendations(model, user_id, user_to_idx, item_ids, item_to_idx, device, k=10, rated_items=None):
    """
    Get top-K recommendations for a user
    
    Args:
        model: Trained NeuMF model
        user_id: ID of the user to recommend for
        user_to_idx: User ID to index mapping
        item_ids: List of all item IDs in the dataset
        item_to_idx: Item ID to index mapping
        device: Device to run computations on
        k: Number of recommendations to return
        rated_items: Set of items the user has already rated (to exclude from recommendations)
    """
    model.eval()
    
    # Convert user ID to index
    if isinstance(user_id, str):
        user_idx = torch.LongTensor([user_to_idx[user_id]])
    else:
        user_idx = torch.LongTensor([user_id])
    user_idx = user_idx.to(device)
    
    # Prepare all item indices
    all_item_indices = torch.LongTensor([item_to_idx[item] for item in item_ids]).to(device)
    
    # Create user tensor with same length as items (for batch prediction)
    user_indices = user_idx.repeat(len(all_item_indices))
    
    # Predict ratings for all items
    with torch.no_grad():
        predictions = model(user_indices, all_item_indices)
        predictions = torch.clamp(predictions, min=0.5, max=5.0)  # Clip to rating range
    
    # Convert predictions to numpy array
    predictions = predictions.cpu().numpy()
    
    # Create a dictionary of item IDs to predicted ratings
    item_ratings = {item_id: pred for item_id, pred in zip(item_ids, predictions)}
    
    # Filter out items the user has already rated if provided
    if rated_items is not None:
        item_ratings = {item: rating for item, rating in item_ratings.items() 
                       if item not in rated_items}
    
    # Sort items by predicted rating
    sorted_ratings = sorted(item_ratings.items(), key=lambda x: x[1], reverse=True)
    
    # Get top-K items
    top_k = sorted_ratings[:k]
    
    return top_k

# Example usage:
movie_df = pd.read_csv(r"D:\ITI\Rec_Sys_Intake_45\project_descrption\project\data\ml-latest-small\movies.csv")

# First, get the items the user has already rated (to exclude them)
user_id = 85  # Example user ID
rated_items = set(df[df["userId"] == user_id]["movieId"].values)

# Get top 10 recommendations
top_k = get_top_k_recommendations(model, user_id, user_to_idx, item_ids, item_to_idx, device, 
                                 k=10, rated_items=rated_items)

print(f"Top 10 recommendations for user {user_id}:")
for i, (item_id, predicted_rating) in enumerate(top_k, 1):
    movie_name = movie_df[movie_df["movieId"] == item_id]["title"].values[0]  
    print(f"{i}. Item ID: {item_id}, moviename: {movie_name} Predicted rating: {predicted_rating* 4.5 + 0.5:.2f}")

Top 10 recommendations for user 85:
1. Item ID: 920, moviename: Gone with the Wind (1939) Predicted rating: 6.98
2. Item ID: 31658, moviename: Howl's Moving Castle (Hauru no ugoku shiro) (2004) Predicted rating: 6.17
3. Item ID: 899, moviename: Singin' in the Rain (1952) Predicted rating: 5.99
4. Item ID: 1090, moviename: Platoon (1986) Predicted rating: 5.73
5. Item ID: 6615, moviename: Freddy vs. Jason (2003) Predicted rating: 5.63
6. Item ID: 4799, moviename: It's a Mad, Mad, Mad, Mad World (1963) Predicted rating: 5.61
7. Item ID: 2709, moviename: Muppets From Space (1999) Predicted rating: 5.57
8. Item ID: 68237, moviename: Moon (2009) Predicted rating: 5.56
9. Item ID: 3480, moviename: Lucas (1986) Predicted rating: 5.55
10. Item ID: 234, moviename: Exit to Eden (1994) Predicted rating: 5.53


In [48]:
def predict_rating(model, user_id, item_id, user_to_idx, item_to_idx, device):
    model.eval()
    
    if isinstance(user_id, str):
        user_idx = torch.LongTensor([user_to_idx[user_id]])
    else:
        user_idx = torch.LongTensor([user_id])
    
    if isinstance(item_id, str):
        item_idx = torch.LongTensor([item_to_idx[item_id]])
    else:
        item_idx = torch.LongTensor([item_id])
    
    with torch.no_grad():
        user_idx = user_idx.to(device)
        item_idx = item_idx.to(device)
        prediction = model(user_idx, item_idx)
        prediction = torch.clamp(prediction, min=0.5, max=5.0)  # Clip to rating range
    
    return prediction.item()

# Example prediction
user_id = 431  # or use original user ID if it's a string
item_id = 4730  # or use original item ID if it's a string
prediction = predict_rating(model, user_id, item_id, user_to_idx, item_to_idx, device)
prediction = prediction * 4.5 + 0.5
print(f'Predicted rating: {prediction:.4f}')

Predicted rating: 2.7988


In [38]:
class PyTorchWrapper:
    """Wrapper to make PyTorch model compatible with Surprise's interface"""
    def __init__(self, model, user_to_idx, item_to_idx, device):
        self.model = model
        self.user_to_idx = user_to_idx
        self.item_to_idx = item_to_idx
        self.device = device
        
    def predict(self, uid, iid, r_ui=None):
        # Convert string IDs to indices
        try:
            user_idx = torch.LongTensor([self.user_to_idx[str(uid)]])
            item_idx = torch.LongTensor([self.item_to_idx[str(iid)]])
        except KeyError as e:
            # Return neutral prediction if user/item not in model
            return Prediction(uid, iid, r_ui, 3.0, {'was_impossible': True})
        
        with torch.no_grad():
            user_idx = user_idx.to(self.device)
            item_idx = item_idx.to(self.device)
            prediction = self.model(user_idx, item_idx)
            prediction = torch.clamp(prediction, min=0.5, max=5.0).item()
        
        # Scale prediction if needed (remove if already scaled)
        prediction = prediction * 4.5 + 0.5
        
        return Prediction(uid, iid, r_ui, prediction, {})

# Hybird System

In [3]:
np.random.seed(0)
random.seed(0)

# Load up common data set for the recommender algorithms
(ml, evaluationData, rankings) = LoadMovieLensData()

# Construct an Evaluator to, you know, evaluate them
evaluator = Evaluator(evaluationData, rankings)

#Simple RBM
SVDPlusPlus = SVDpp()
#Content
ContentKNN = ContentKNNAlgorithm()

#Combine them
Hybrid = HybridAlgorithm([SVDPlusPlus, ContentKNN], [0.5, 0.5])

evaluator.AddAlgorithm(SVDPlusPlus, "SVD")
evaluator.AddAlgorithm(ContentKNN, "ContentKNN")
evaluator.AddAlgorithm(Hybrid, "Hybrid")

# Fight!
evaluator.Evaluate(False)

evaluator.SampleTopNRecs(ml)

Loading movie ratings...

Computing movie popularity ranks so we can measure novelty later...
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating  SVD ...
Evaluating accuracy...
Analysis complete.
Evaluating  ContentKNN ...
Evaluating accuracy...
Computing content-based similarity matrix...
0  of  8775
100  of  8775
200  of  8775
300  of  8775
400  of  8775
500  of  8775
600  of  8775
700  of  8775
800  of  8775
900  of  8775
1000  of  8775
1100  of  8775
1200  of  8775
1300  of  8775
1400  of  8775
1500  of  8775
1600  of  8775
1700  of  8775
1800  of  8775
1900  of  8775
2000  of  8775
2100  of  8775
2200  of  8775
2300  of  8775
2400  of  8775
2500  of  8775
2600  of  8775
2700  of  8775
2800  of  8775
2900  of  8775
3000  of  8775
3100  of  8775
3200  of  8775
3300  of  8775
3400  of  8775
3500  of  8775
3600  of  8775
3700  of  8775
3800  of  8775
3900  of  8775
4000  of  8775
4100  of  8775
4200  of  8775
4300  of 

| Algorithm   | RMSE    | MAE     |
|-------------|---------|---------|
| SVD++         | 0.8695  | 0.6652  |
| ContentKNN  | 0.9055  | 0.6983  |
| Hybrid      | 0.8622  | 0.6615  |

In [50]:
from surprise import Prediction
from surprise import accuracy
from surprise.model_selection import train_test_split as surprise_train_test_split

pytorch_model = PyTorchWrapper(model, user_to_idx, item_to_idx, device)
SVDPlusPlus = SVDpp()
ItemKNN = KNNBasic(sim_options={'name': 'cosine', 'user_based': False})
(ml, evaluationData, rankings) = LoadMovieLensData()
# Hybrid prediction function that combines all three
def hybrid_predict(testset, weights=[0.2, 0.4, 0.4]):
    predictions = []
    for uid, iid, true_r in testset:
        try:
            pred_pytorch = pytorch_model.predict(uid, iid).est
            pred_svd = SVDPlusPlus.predict(uid, iid).est
            pred_knn = ItemKNN.predict(uid, iid).est
            
            hybrid_est = (weights[0]*pred_pytorch + 
                         weights[1]*pred_svd + 
                         weights[2]*pred_knn)
            
            predictions.append(Prediction(uid, iid, true_r, hybrid_est, {}))
        except Exception as e:
            print(f"Skipping prediction for {uid}, {iid}: {str(e)}")
            continue
    return predictions

# Train the Surprise algorithms
trainset, testset = surprise_train_test_split(evaluationData, test_size=0.25)
SVDPlusPlus.fit(trainset)
ItemKNN.fit(trainset)

# Generate hybrid predictions

hybrid_preds = hybrid_predict(testset)

# Evaluate
def print_metrics(predictions, name):
    rmse = accuracy.rmse(predictions, verbose=False)
    mae = accuracy.mae(predictions, verbose=False)
    print(f"{name:<15} RMSE: {rmse:.4f}, MAE: {mae:.4f}")

print_metrics([pytorch_model.predict(uid, iid, r) for (uid, iid, r) in testset], "PyTorch")
print_metrics([SVDPlusPlus.predict(uid, iid, r) for (uid, iid, r) in testset], "SVD++")
print_metrics([ItemKNN.predict(uid, iid, r) for (uid, iid, r) in testset], "ItemKNN")
print_metrics(hybrid_preds, "Hybrid")

# Example prediction
user_id = "431"  # Using original user ID
item_id = "4730"  # Using original item ID
print(f"\nExample Hybrid Prediction for user {user_id}, item {item_id}:")
print(f"PyTorch: {pytorch_model.predict(user_id, item_id).est:.2f}")
print(f"SVD++: {SVDPlusPlus.predict(user_id, item_id).est:.2f}")
print(f"ItemKNN: {ItemKNN.predict(user_id, item_id).est:.2f}")
print(f"Hybrid: {hybrid_predict([(user_id, item_id, None)])[0].est:.2f}")

Loading movie ratings...

Computing movie popularity ranks so we can measure novelty later...
Computing the cosine similarity matrix...
Done computing similarity matrix.
PyTorch         RMSE: 1.1616, MAE: 0.9427
SVD++           RMSE: 0.8763, MAE: 0.6711
ItemKNN         RMSE: 0.9889, MAE: 0.7673
Hybrid          RMSE: 0.9507, MAE: 0.7630

Example Hybrid Prediction for user 431, item 4730:
PyTorch: 3.00
SVD++: 2.83
ItemKNN: 3.50
Hybrid: 3.10


In [53]:
from surprise.model_selection import train_test_split
np.random.seed(0)
random.seed(0)

# Load data
(ml, evaluationData, rankings) = LoadMovieLensData()

# Initialize algorithms
SVDPlusPlus = SVDpp()
ItemKNN = KNNBasic(sim_options={'name': 'cosine', 'user_based': False})
trainset, testset = train_test_split(evaluationData, test_size=0.25)

# Train both algorithms
SVDPlusPlus.fit(trainset)
ItemKNN.fit(trainset)



# Function to get weighted average predictions
pytorch_model = PyTorchWrapper(model, user_to_idx, item_to_idx, device)

# Hybrid prediction function
def get_hybrid_predictions(algo1, algo2, pytorch_model, testset, weights=[0.2, 0.4, 0.4]):
    predictions = []
    for uid, iid, true_r in testset:
        try:
            pred_pytorch = pytorch_model.predict(uid, iid).est
            pred_svd = SVDPlusPlus.predict(uid, iid).est
            pred_knn = ItemKNN.predict(uid, iid).est
            
            hybrid_est = (weights[0]*pred_pytorch + 
                         weights[1]*pred_svd + 
                         weights[2]*pred_knn)
            
            predictions.append(Prediction(uid, iid, true_r, hybrid_est, {}))
        except Exception as e:
            print(f"Skipping prediction for {uid}, {iid}: {str(e)}")
            continue
    return predictions

# Get predictions (40% SVD++, 30% ItemKNN, 30% PyTorch)
hybrid_preds = get_hybrid_predictions(SVDPlusPlus, ItemKNN, pytorch_model, testset)

# Top-N recommendations function
def get_top_n_hybrid(user_id, hybrid_preds, n=10):
    user_preds = [pred for pred in hybrid_preds if pred[0] == user_id]
    user_preds.sort(key=lambda x: x[2], reverse=True)
    return user_preds[:n]


# Example: Get top 10 hybrid recommendations for user 1
top_hybrid = get_top_n_hybrid('10', hybrid_preds)
print("Hybrid Top 10 Recommendations for User 85:")
for item in top_hybrid:
    print(f"Item {item[1]}: Predicted rating {item[2]:.2f}")


Loading movie ratings...

Computing movie popularity ranks so we can measure novelty later...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Hybrid Top 10 Recommendations for User 85:
Item 33794: Predicted rating 5.00
Item 92259: Predicted rating 5.00
Item 106696: Predicted rating 4.50
Item 69406: Predicted rating 4.50
Item 51705: Predicted rating 4.50
Item 68954: Predicted rating 4.00
Item 6942: Predicted rating 4.00
Item 95167: Predicted rating 4.00
Item 588: Predicted rating 4.00
Item 7375: Predicted rating 4.00
