In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.decomposition import NMF

# Load dataset
ratings = pd.read_csv("/Users/mohdmaazraeen/Desktop/Sem 5/PBL/ratings_Electronics.csv")

# Rename columns to standard format
ratings = ratings.rename(columns={
    "reviewerID": "userId",
    "asin": "ProductId",
    "overall": "rating"
})

# Create user-item matrix
user_item_matrix = ratings.pivot(index="userId", columns="ProductId", values="rating").fillna(0)

# Train-test split
train, test = train_test_split(ratings, test_size=0.2, random_state=42)

def train_recommender(n_components=20, max_iter=200):
    # Build matrix factorization model
    model = NMF(n_components=n_components, max_iter=max_iter, random_state=42)
    user_factors = model.fit_transform(user_item_matrix)
    item_factors = model.components_
    
    # Predict ratings for test data
    preds, truths = [], []
    user_map = {u: i for i, u in enumerate(user_item_matrix.index)}
    item_map = {p: i for i, p in enumerate(user_item_matrix.columns)}

    for _, row in test.iterrows():
        u, i, r = row["userId"], row["ProductId"], row["rating"]
        if u in user_map and i in item_map:
            pred = np.dot(user_factors[user_map[u], :], item_factors[:, item_map[i]])
            preds.append(pred)
            truths.append(r)

    rmse = mean_squared_error(truths, preds, squared=False)
    print(f"Model trained with RMSE: {rmse:.4f}")
    return model

trained_model = train_recommender(n_components=30, max_iter=300)


KeyError: 'userId'