# Importing dataset

In [27]:
import pandas as pd
import numpy as np

# Load the dataset
url = 'http://files.grouplens.org/datasets/movielens/ml-100k/u.data'
columns = ['user_id', 'item_id', 'rating', 'timestamp']
data = pd.read_csv(url, sep='\t', names=columns)

# Create user-item matrix
matrix = data.pivot_table(index='item_id', columns='user_id', values='rating')


# Item based

In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics.pairwise import cosine_similarity

n_splits = 5

K_values = [10, 20, 30, 40, 50]

kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
mae_values = []
for train_index, test_index in kf.split(matrix):
    
    train_data = matrix.iloc[train_index].fillna(0)
    test_data = matrix.iloc[test_index].fillna(0)
    
    for K in K_values:
        predicted_ratings = []
        for item_id, test_ratings in test_data.iterrows():
            
            # Compute the cosine similarity between the test item and all other items
            
            similarities = cosine_similarity([test_ratings], train_data, dense_output=True)

            neighbor_indices = similarities.argsort()[0][-K:]

            neighbor_ratings = train_data.iloc[neighbor_indices]

            weights = similarities[0][neighbor_indices]
            weighted_ratings = np.multiply(neighbor_ratings, weights[:, np.newaxis])
            predicted_rating = np.sum(weighted_ratings) / np.sum(weights)

            predicted_ratings.append(predicted_rating)

        # Compute MAE 
        mae = np.abs(test_data.values - predicted_ratings).mean()
        mae_values.append(mae)
        print(f"Item-based, K={K}: MAE = {mae}")

        # Add the MAE to the list



# Variance Weighting


In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics.pairwise import cosine_similarity

# folds
n_splits = 5

K_values = [10, 20, 30, 40, 50]
kf = KFold(n_splits=n_splits, shuffle=True, random_state=1)

# stores MAE for each K
mae_values_1 = []

for train_index, test_index in kf.split(matrix):
    
    #training and testing splits
    train_data = matrix.iloc[train_index].fillna(0)
    test_data = matrix.iloc[test_index].fillna(0)
    
    for K in K_values:

        predicted_ratings_1 = []

        for item_id, test_ratings in test_data.iterrows():       #test items
            
            

            # Compute the cosine similarity between the test item and all other items
            similarities = cosine_similarity([test_ratings], train_data, dense_output=True)

            # indexes of K nearest neighbours
            neighbor_indices_1 = similarities.argsort()[0][-K:]

            # ratings of neighbours
            neighbor_ratings_1 = train_data.iloc[neighbor_indices_1]

            # Compute the variance of the ratings of the K nearest neighbors
            variances = np.var(neighbor_ratings_1, axis=0)

            # Compute the weights as the inverse of the variances
            eps1 = 1e-6
            s=1e-6+0.8
            weights_1 = 1.0 / (variances + eps1)    #eps avoids division by zero

            # predicted rating = weighted average of the neighbor ratings
            # weights_1 = similarities[0][neighbor_indices_1]

            weighted_ratings_1 = np.multiply(neighbor_ratings_1, weights_1)

            predicted_rating_1 = np.sum(weighted_ratings_1) / np.sum(weights_1)

            predicted_ratings_1.append(predicted_rating_1)

        # Compute the MAE for this value of K

        mae_1 = np.abs(test_data.values - predicted_ratings_1).mean()

        # Print the MAE
        print(f"Item-based, K={K}: MAE = {mae_1}")

        # Add the MAE to the list
        mae_values_1.append(mae_1)

