In [None]:
#requires Pytorch >= 0.4 for graph visualization. fastai currently uses 0.3 from conda install
#pip install tensorflow
#pip install git+https://github.com/lanpa/tensorboard-pytorch
#tensorboard --logdir="directory of logs file. default location is PATH/logs"

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

from fastai.learner import *
from fastai.column_data import *
from tensorboard_cb import *

[http://files.grouplens.org/datasets/movielens/ml-latest-small.zip](http://files.grouplens.org/datasets/movielens/ml-latest-small.zip)

In [None]:
PATH = Path('../data/ml-latest-small')

In [None]:
ratings = pd.read_csv(PATH/'ratings.csv')
movies = pd.read_csv(PATH/'movies.csv')

val_idxs = get_cv_idxs(len(ratings))

u_uniq = ratings.userId.unique()
user2idx = {o:i for i,o in enumerate(u_uniq)}
ratings.userId = ratings.userId.apply(lambda x: user2idx[x])
m_uniq = ratings.movieId.unique()
movie2idx = {o:i for i,o in enumerate(m_uniq)}
ratings.movieId = ratings.movieId.apply(lambda x: movie2idx[x])

n_users=int(ratings.userId.nunique())
n_movies=int(ratings.movieId.nunique())


x = ratings.drop(['rating', 'timestamp'],axis=1)

y = ratings['rating'].astype(np.float32)

min_rating,max_rating = ratings.rating.min(),ratings.rating.max()

md = ColumnarModelData.from_data_frame(PATH, val_idxs, x, y, ['userId', 'movieId'], 64)


In [None]:
class EmbeddingNet(nn.Module):
    def __init__(self, n_users, n_movies, n_factors=50, nh=10, p1=0.05, p2=0.5):
        super().__init__()
        self.u = nn.Embedding(n_users,n_factors)
        self.m = nn.Embedding(n_movies, n_factors)
        self.u.weight.data.uniform_(-0.01,0.01)
        self.m.weight.data.uniform_(-0.01,0.01)
        self.lin1 = nn.Linear(n_factors*2, nh)
        self.lin2 = nn.Linear(nh,1)
        self.drop1 = nn.Dropout(p1)
        self.drop2 = nn.Dropout(p2)
    def forward(self, cats, conts):
        users,movies = cats[:,0],cats[:,1]
        x = torch.cat([self.u(users),self.m(movies)], dim=1)
        x = self.drop1(x)
        x = self.lin1(x)
        x = F.relu(x)
        x = self.drop2(x)
        x = self.lin2(x)
        x = F.sigmoid(x)
        return x * (max_rating-min_rating+1) + min_rating-0.5

In [None]:
learn = Learner(md, SingleModel(to_gpu(EmbeddingNet(n_users, n_movies))))
learn.opt_fn = optim.Adam
learn.crit = F.mse_loss
learn.clip = None

In [None]:
def rmsle(y_pred, targ):
    tmp = y_pred - targ
    return math.sqrt((tmp**2).mean())

In [None]:
tb_logger = TensorboardLogger(learn.model, md, "test", metrics_names=["rmsle"])

In [None]:
learn.fit(1e-3, 2, cycle_len=2, cycle_mult=2, wds=1e-5, metrics=[rmsle], callbacks=[tb_logger])