In [None]:
# Implementation of a very simple and much to improve user-based collaborative filtering (CF) recommender.
# Author: Markus Schedl

# Load required modules
import csv
import numpy as np
import h5py
from scipy import sparse


UAM_MATLAB_FILE = 'LFM-1b_LEs.mat'         # Matlab .mat file where the listening events are stored
ARTISTS_FILE = "LFM-1b_artists.txt"        # artist names for UAM
USERS_FILE = "output-4.txt"            # user names for UAM
K = 3                                      # maximum number of seed's neighbors to select


# Read the user-artist-matrix and corresponding artist and user indices from Matlab file
def read_UAM(m_file):
    mf = h5py.File(m_file, 'r')
    user_ids = np.array(mf.get('idx_users')).astype(np.int64)[ :55771]
    artist_ids = np.array(mf.get('idx_artists')).astype(np.int64)
    # Load UAM
    UAM = sparse.csr_matrix((mf['/LEs/']["data"],
                             mf['/LEs/']["ir"],
                             mf['/LEs/']["jc"])).transpose()    #.tocoo().transpose()
    # user and artist indices to access UAM
    UAM_user_idx = UAM.indices #UAM.row -> for COO matrix
    UAM_artist_idx = UAM.indptr #UAM.col -> for COO matrix
    return UAM, UAM_user_idx, UAM_artist_idx, user_ids, artist_ids


# Function to read metadata (users or artists)
def read_from_file(filename, col):                  # col = column to read from file
    data = []
    with open(filename, 'r') as f:                  # open file for reading
        reader = csv.reader(f, delimiter='\t')      # create reader
        headers = next(reader)                     # skip header
        for row in reader:
            item = row[col]
            data.append(item)
    f.close()
    return data


# Main program
if __name__ == '__main__':
    # Initialize variables
    artists = []            # artists
    users = []              # users

    # Read UAM
    UAM, UAM_user_idx, UAM_artist_idx, user_ids, artist_ids = read_UAM(UAM_MATLAB_FILE)
    print ('Users: ', len(user_ids))
    print ('Artists: ', len(artist_ids))

    # Load metadata from provided files into lists
    artists = read_from_file(ARTISTS_FILE, 1)
    users = read_from_file(USERS_FILE, 0)

    # Randomly select a subset of 10 users (excluding the target user)
    # target_user_idx = np.random.choice(len(user_ids), size=1, replace=False)
    # random_subset_users = np.random.choice(np.setdiff1d(np.arange(len(user_ids)), target_user_idx), size=10, replace=False)

    # For the target user
    # target_user_idx = target_user_idx[0]
    # print("Target user-id: " + str(users[target_user_idx]))

    # get (normalized) playcount vector for the target user
    # pc_vec = UAM.getrow(target_user_idx)

    # Randomly select a subset of 1,100 users, have chnged size t 10 for just checking
    # random_subset_users = np.random.choice(len(user_ids), size=10, replace=False)

    # For all users
    for u in range(0, UAM.shape[0]):
        print ("Seed user-id: " + str(users[u]))

        # get (normalized) playcount vector for current user u
        pc_vec = UAM.getrow(u)

        # Compute similarities as dot product between playcount vector of user and all users via UAM (assuming that UAM is already normalized)
        # print uU_sim_users
    uU_sim = pc_vec.dot(UAM.transpose()).tocoo()
    uU_user_idx = uU_sim.col
    uU_data = uU_sim.data

        #
        # Determine nearest neighbors to seed based on uUM
        #

        # Find the occurrence of the seed user in uU_data cols
        # and set to 0 so that it is not selected as its own NN
    occ_user_idx = (uU_user_idx == u)
    uU_data[occ_user_idx] = 0

        # Eliminate zeros
    uU_sim.data = uU_data
    uU_sim = uU_sim.tocsr()
    uU_sim.eliminate_zeros()
    uU_sim = uU_sim.tocoo()
    uU_user_idx = uU_sim.col
    uU_data = uU_sim.data

        # Sort users according to the similarity (uU_data)
    sort_index = np.argsort(uU_data)

        # Select the K nearest neighbors among all users
        # Note that uU_user_idx indeed provides the indices for users in UAM
    recommended_user_idx = uU_user_idx[sort_index[-K:]]
        # Get user_ids corresponding to nearest neighbors
    recommended_user_ids = user_ids[recommended_user_idx]
        # Get similarity score for nearest neighbors
    recommended_user_scores = uU_data[sort_index[-K:]]

    print ("Nearest K=" + str(K) + " neighbors\' user-ids: ", recommended_user_ids.flatten())
    print ("Scores/similarities:" + str(recommended_user_scores))
    print ("Index in UAM for recommended user-ids:" + str(recommended_user_idx))

        #
        # Determine set of recommended artists
        #

    import heapq

    recommended_artists_idx = []
    for u_idx in recommended_user_idx:
        recommended_artists_idx.extend(list(UAM.getrow(u_idx).indices))

        # Convert to set to remove duplicates and sort it
    recommended_artists_idx = sorted(set(recommended_artists_idx))
        # Remove artists already known to seed user
    recommended_artists_idx = np.setdiff1d(recommended_artists_idx, pc_vec.indices)

        # Get the top 5 recommended artists indices
    top_10_artists_idx = heapq.nsmallest(10, recommended_artists_idx)

    print("Indices of 10 recommended artists: ", top_10_artists_idx)



        # recommended_artists_idx = []
        # for u_idx in recommended_user_idx:
            # recommended_artists_idx.extend(list(UAM.getrow(u_idx).indices))

        # Convert to set to remove duplicates and sort it
        # recommended_artists_idx = sorted(set(recommended_artists_idx))
        # Remove artists already known to seed user
        # recommended_artists_idx = np.setdiff1d(recommended_artists_idx, pc_vec.indices)

        # print ("Indices of " + str(len(recommended_artists_idx)) + " recommended artists: ", recommended_artists_idx)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Seed user-id: 48675529
Seed user-id: 48675725
Seed user-id: 48677172
Seed user-id: 48677295
Seed user-id: 48677380
Seed user-id: 48677398
Seed user-id: 48677510
Seed user-id: 48677943
Seed user-id: 48678539
Seed user-id: 48678623
Seed user-id: 48678643
Seed user-id: 48678874
Seed user-id: 48679404
Seed user-id: 48679420
Seed user-id: 48679556
Seed user-id: 48679562
Seed user-id: 48679575
Seed user-id: 48679815
Seed user-id: 48679862
Seed user-id: 48679867
Seed user-id: 48679875
Seed user-id: 48680098
Seed user-id: 48680337
Seed user-id: 48680476
Seed user-id: 48680533
Seed user-id: 48680634
Seed user-id: 48680680
Seed user-id: 48680725
Seed user-id: 48680827
Seed user-id: 48681071
Seed user-id: 48681135
Seed user-id: 48681202
Seed user-id: 48681306
Seed user-id: 48681828
Seed user-id: 48682322
Seed user-id: 48682460
Seed user-id: 48683517
Seed user-id: 48684391
Seed user-id: 48684586
Seed user-id: 48684816
Seed user-id: 4

IndexError: ignored