In [1]:
# %cd /content/drive/MyDrive/Colab Notebooks/fastai-practice

# !git config --global user.email 'intaek428@gmail.com'
# !git config --global user.name 'ITHwang'

# !git add .
# !git commit -m ''
# !git push

!pip install -Uqq fastbook
from fastai.collab import *
from fastai.tabular.all import *

[K     |████████████████████████████████| 720 kB 5.1 MB/s 
[K     |████████████████████████████████| 48 kB 4.8 MB/s 
[K     |████████████████████████████████| 1.2 MB 41.4 MB/s 
[K     |████████████████████████████████| 189 kB 45.9 MB/s 
[K     |████████████████████████████████| 55 kB 3.5 MB/s 
[K     |████████████████████████████████| 558 kB 45.7 MB/s 
[K     |████████████████████████████████| 51 kB 325 kB/s 
[K     |████████████████████████████████| 130 kB 41.4 MB/s 
[?25h

In [2]:
path = untar_data(URLs.ML_100k)

In [3]:
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None, names=['user', 'movie', 'rating', 'timestamp'])
ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [4]:
movies = pd.read_csv(path/'u.item', delimiter='|', encoding='latin-1',
                     usecols=(0, 1), names=('movie', 'title'), header=None)
movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [5]:
ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [6]:
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)
dls.show_batch()

Unnamed: 0,user,title,rating
0,8,Event Horizon (1997),3
1,476,Drop Dead Fred (1991),2
2,130,"Fox and the Hound, The (1981)",4
3,758,Star Trek: First Contact (1996),4
4,11,Chinatown (1974),3
5,363,Star Trek IV: The Voyage Home (1986),2
6,911,Strictly Ballroom (1992),5
7,64,Monty Python and the Holy Grail (1974),5
8,385,Sunset Blvd. (1950),5
9,551,Pretty Woman (1990),4


In [7]:
n_users = len(dls.classes['user'])
n_movies = len(dls.classes['title'])
n_factors = 5

user_factors = torch.randn(n_users, n_factors)
movie_factors = torch.randn(n_movies, n_factors)

In [None]:
one_hot_3 = one_hot(3, n_users).float()
one_hot_3

In [18]:
class DotProduct(Module):
    def __init__(self, n_users, n_movies, n_factors):
        self.user_factors = Embedding(n_users, n_factors)
        self.movie_factors = Embedding(n_movies, n_factors)
    
    def forward(self, x):
        users = self.user_factors(x[:,0])
        movies = self.movie_factors(x[:,1])
        res = users*movies
        return res.sum(dim=1)

In [19]:
x, y = dls.one_batch()
x.shape


torch.Size([64, 2])

In [20]:
y.shape

torch.Size([64, 1])

In [21]:
model = DotProduct(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 3e-3)

epoch,train_loss,valid_loss,time
0,2.002991,1.721531,00:09
1,1.033409,1.0147,00:09
2,0.970238,0.945035,00:09
3,0.891573,0.886417,00:09
4,0.851011,0.878609,00:09


In [22]:
def sigmoid_range(x, lo, hi): return torch.sigmoid(x) * (hi-lo) + lo

In [32]:
class DotProductBias(Module):
    def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
        self.user_factors = Embedding(n_users, n_factors)
        self.user_bias = Embedding(n_users, 1)
        self.movie_factors = Embedding(n_movies, n_factors)
        self.movie_bias = Embedding(n_movies, 1)
        self.y_range = y_range

    def forward(self, x):
        users = self.user_factors(x[:,0])
        movies = self.movie_factors(x[:,1])
        res = (users * movies).sum(dim=1, keepdim=True)
        res += self.user_bias(x[:,0]) + self.movie_bias(x[:,1])
        return sigmoid_range(res, *self.y_range)

In [33]:
model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.943718,0.919295,00:10
1,0.844525,0.850556,00:10
2,0.748569,0.81202,00:10
3,0.58593,0.802597,00:10
4,0.473396,0.803814,00:10


In [38]:
def create_params(size):
    return nn.Parameter(torch.zeros(*size).normal_(0, 0.01))

class DotProductBias(Module):
    def __init__(self, n_users, n_movies, n_factors, y_range=(0, 5.5)):
        self.user_factors = create_params([n_users, n_factors])
        self.user_bias = create_params([n_users])
        self.movie_factors = create_params([n_movies, n_factors])
        self.movie_bias = create_params([n_movies])
        self.y_range = y_range

    def forward(self, x):
        users = self.user_factors[x[:,0]]
        movies = self.movie_factors[x[:,1]]
        res = (users*movies).sum(dim=1)
        res += self.user_bias[x[:, 0]] + self.movie_bias[x[:, 1]]
        return sigmoid_range(res, *self.y_range)

In [39]:
model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.963713,0.931876,00:21
1,0.850839,0.851061,00:21
2,0.730462,0.818289,00:11
3,0.573088,0.804226,00:11
4,0.47507,0.805363,00:11
