In [1]:
import numpy as np
import pandas as pd
import sys
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
import random
from sklearn.preprocessing import MinMaxScaler
import implicit
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv("/Users/MAC/Desktop/MIP 2019/Code/mydata.txt", header = 0, sep=',', low_memory=False)
data = data.drop_duplicates()

In [3]:
#### BUILDING 2D RECOMMENDER SYSTEM ###
alpha_val = 15
users = list(np.sort(data.user_id.unique()))
artists = list(data.artists_name.unique())
songs = list(data.track_title.unique())

data['usercatcode'] = data['user_id'].astype("category").cat.codes
data['artistcatcode'] = data['artist_id'].astype("category").cat.codes
data['ttime'] = data['tweet_time'].astype("category").cat.codes
data['songcatcode'] = data.track_title.astype('category', songs).cat.codes 
# Build train and test dataset
train, test = train_test_split(data, test_size=0.2)

In [4]:
#----------------------------------
# ARTISTS RECOMMENDATION MODEL 
#----------------------------------
artist_train = train.groupby(['user_id', 'artists_name', 'artistcatcode', 'usercatcode'])['tweet_id'].count().reset_index(name='tweet_count')
artist_data = data.groupby(['user_id', 'artists_name', 'artistcatcode', 'usercatcode'])['tweet_id'].count().reset_index(name='tweet_count')

# Create sparse matrices
sparse_artist_user = sparse.csr_matrix((artist_train['tweet_count'], (artist_train['artistcatcode'], artist_train['usercatcode'])))
sparse_user_artist = sparse.csr_matrix((artist_data['tweet_count'], (artist_data['usercatcode'], artist_data['artistcatcode'])))

# Calculate the confidence by multiplying it by our alpha value.
userartist_conf = (sparse_artist_user * alpha_val).astype('double')

# Initialize the als model_artist and fit it using the sparse artist-user matrix
model_artist = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=50)
model_artist.fit(userartist_conf) 

100%|██████████| 50.0/50 [00:27<00:00,  1.83it/s]


In [7]:
#---------------------
# FIND SIMILAR ARTISTS
#---------------------
n_similar = 20
# Find the 10 most similar to an artist

def similar_artists_func(name=random.choice(artists)): 
    testartist = data.artistcatcode.loc[data.artists_name == name].iloc[0]

    # Use implicit to get similar items.
    similar = model_artist.similar_items(testartist, n_similar)
    
    # Print the names of our most similar artists
    print('{} similar artists to {}:'.format(n_similar, name))
    max_similarity = similar[0][1]
    for item in similar:
        idx, score = item
        percentage = (score / max_similarity) * 100
        print(data.artists_name.loc[data.artistcatcode == idx].iloc[0], "- with a similarity of {:.2f}%".format(percentage))

#-----------------------------------------------
# CREATE RECOMMENDATIONS FOR USER
#-----------------------------------------------

def recommend_artists_func(user_id):
    # Create recommendations for user with user_id 161262801
    testuser = data.usercatcode.loc[data.user_id == user_id].iloc[0]

    # Use the implicit recommender.
    recommended = model_artist.recommend(testuser, sparse_user_artist)
    # Get artist names from ids
    recommend_artist = set()
    real_artist = set()
    for idx, score in recommended:
        recommend_artist.add(data.artists_name.loc[data.artistcatcode == idx].iloc[0])    
    for artist,b in data[data['user_id'] == user_id].groupby(['artists_name']):
        real_artist.add(artist)
    match = recommend_artist.intersection(real_artist)
    match_percentage = len(match)/n_similar *100
    return match_percentage
    '''
    print('{} artists recommendations for user {}: \n{}'.format(n_similar, user_id, recommend_artist), '\n')
    print('User {} actually listened to artist: {}'.format(user_id, real_artist), '\n')
    if match_percentage != 0: 
        print('Match: {} with the percentage of {}%'.format(match, match_percentage))
    else: 
        print('No match found')'''
    

In [6]:
#Test on all users 
users = []
results = []
for user in test.user_id.unique(): 
    users.append(user)
    results.append(recommend_artists_func(user))
TestResults = pd.DataFrame({'user': users, 'match': results})

KeyboardInterrupt: 

In [None]:
TestResults.sort_values(by='match', ascending=False).head(10)