# Recomendation Model 
## Matrix Factorization Recomendation Model

## Required Libraries

In [60]:
import pandas as pd
import numpy as np

## Importing of DataSet

In [43]:
data = pd.read_csv('user-movie-rating 5000*3.csv')

## Spliting of the DataSet into Train and Validate

In [44]:
np.random.seed(3)
msk = np.random.rand(len(data)) < 0.8
train = data[msk].copy()
val = data[~msk].copy()

## Encoding of the DataSet
Encoding so that we have continues values in the dataset

In [45]:
def proc_col(col, train_col=None):
    if train_col is not None:
        uniq = train_col.unique()
    else:
        uniq = col.unique()
    name2idx = {o:i for i,o in enumerate(uniq)}
    return name2idx, np.array([name2idx.get(x, -1) for x in col]), len(uniq)

In [46]:
def encode_data(df, train=None):
    df = df.copy()
    for col_name in ["userid", "movieid"]:
        train_col = None
        if train is not None:
            train_col = train[col_name]
        _,col,_ = proc_col(df[col_name], train_col)
        df[col_name] = col
        df = df[df[col_name] >= 0]
    return df

In [47]:
df_train = encode_data(train)
df_val = encode_data(val, train)
df_train.movieid.nunique()

186

## Required Libraries for the Model

In [48]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [49]:
num_users = len(df_train.userid.unique())
num_items = len(df_train.movieid.unique())
print(num_users, num_items)

3907 186


## Model

In [50]:
def train_epocs(model, epochs=10, lr=0.01, wd=0.0, unsqueeze=False):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    model.train()
    for i in range(epochs):
        users = torch.LongTensor(df_train.userid.values).cuda()
        items = torch.LongTensor(df_train.movieid.values).cuda()
        ratings = torch.FloatTensor(df_train.rating.values).cuda()
        if unsqueeze:
            ratings = ratings.unsqueeze(1)
        y_hat = model(users, items)
        loss = F.mse_loss(y_hat, ratings)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(loss.item()) 
    test_loss(model, unsqueeze)

In [51]:
ratings = torch.FloatTensor(df_train.rating.values)
print(ratings.shape)
ratings = ratings.unsqueeze(1).cuda()
print(ratings.shape)

torch.Size([4002])
torch.Size([4002, 1])


In [52]:
def test_loss(model, unsqueeze=False):
    model.eval()
    users = torch.LongTensor(df_val.userid.values).cuda()
    items = torch.LongTensor(df_val.movieid.values).cuda()
    ratings = torch.FloatTensor(df_val.rating.values).cuda()
    if unsqueeze:
        ratings = ratings.unsqueeze(1)
    y_hat = model(users, items)
    loss = F.mse_loss(y_hat, ratings)
    print("test loss %.3f " % loss.item())

In [53]:
class CollabFNet(nn.Module):
    def __init__(self, num_users, num_items, emb_size=100, n_hidden=10):
        super(CollabFNet, self).__init__()
        self.user_emb = nn.Embedding(num_users, emb_size)
        self.item_emb = nn.Embedding(num_items, emb_size)
        self.lin1 = nn.Linear(emb_size*2, n_hidden)
        self.lin2 = nn.Linear(n_hidden, 1)
        self.drop1 = nn.Dropout(0.1)
        
    def forward(self, u, v):
        U = self.user_emb(u)
        V = self.item_emb(v)
        x = F.relu(torch.cat([U, V], dim=1))
        x = self.drop1(x)
        x = F.relu(self.lin1(x))
        x = self.lin2(x)
        return x

In [54]:
model = CollabFNet(num_users, num_items, emb_size=100).cuda()

## Testing of the Model

In [55]:
train_epocs(model, epochs=15, lr=0.05, wd=1e-6, unsqueeze=True)

20.279193878173828
1.7777460813522339
4.661050319671631
1.3562098741531372
2.3295164108276367
2.587092638015747
1.501908302307129
0.9334666728973389
1.5094772577285767
1.5235658884048462
0.8413286209106445
0.5870599150657654
0.8007135391235352
0.9177024960517883
0.7000640034675598
test loss 0.872 


In [56]:
train_epocs(model, epochs=10, lr=0.01, wd=1e-6, unsqueeze=True)

0.41349950432777405
0.5494076609611511
0.363182932138443
0.33686360716819763
0.3887214660644531
0.3501995801925659
0.2855682969093323
0.25902846455574036
0.2799547612667084
0.27691203355789185
test loss 0.952 


In [57]:
train_epocs(model, epochs=10, lr=0.001, wd=1e-6, unsqueeze=True)


0.24252627789974213
0.22614786028862
0.22374558448791504
0.21740210056304932
0.2143857628107071
0.21771125495433807
0.21459698677062988
0.212958425283432
0.21111255884170532
0.20831890404224396
test loss 0.859 
