In [4]:
from torch import nn
import pandas as pd

In [3]:
class Recommender(nn.Module):
    def __init__(self, num_users, num_artists, num_factors):
        super().__init__()
        self.u = nn.Embedding(num_users, num_factors)
        self.a = nn.Embedding(num_artists, num_factors)
        self.u.weight.data.uniform_(-0.01, 0.01)
        self.a.weight.data.uniform_(-0.01, 0.01)
        self.ub = nn.Embedding(num_users, 1)
        self.ab = nn.Embedding(num_artists, 1)
        self.ub.weight.data.uniform_(-0.01, 0.01)
        self.ab.weight.data.uniform_(-0.01, 0.01)
        
    def forward(self, cats, conts):
        users, artists = cats[:,0], cats[:,1]
        us, art = self.u(users), self.a(artists)
        dp = (us*art).sum(1)
        dpb = dp + self.ub(users).squeeze() + self.ab(artists).squeeze()
        return dpb

In [11]:
user_artist = pd.read_csv("data/hetrec2011-lastfm-2k/user_artists.dat", sep='\t')
user_artist.head()

Unnamed: 0,userID,artistID,weight
0,2,51,13883
1,2,52,11690
2,2,53,11351
3,2,54,10300
4,2,55,8983


In [14]:
print("Total records: {0}".format(user_artist.userID.count()))
print("Total users: {0}".format(user_artist.userID.nunique()))
print("Total artists: {0}".format(user_artist.artistID.nunique()))

Total records: 92834
Total users: 1892
Total artists: 17632


In [22]:
def reverse_dict(x):
    return dict(zip(x.values(), x.keys()))


user_remap = dict(enumerate(user_artist.userID.unique()))
artist_remap = dict(enumerate(user_artist.artistID.unique()))
user_artist['userID'] = user_artist.userID.map(reverse_dict(user_remap))
user_artist['artistID'] = user_artist.artistID.map(reverse_dict(artist_remap))
user_artist.head()

Unnamed: 0,userID,artistID,weight
0,0,0,13883
1,0,1,11690
2,0,2,11351
3,0,3,10300
4,0,4,8983


In [23]:
user_artist['weight'] = (user_artist['weight'] - user_artist.weight.mean()) / user_artist.weight.std()
user_artist.head()

Unnamed: 0,userID,artistID,weight
0,0,0,3.502167
1,0,1,2.917573
2,0,2,2.827205
3,0,3,2.547037
4,0,4,2.195961
