<a href="https://colab.research.google.com/github/NajmusSaqib/FastAI_Lessons/blob/main/Collaborative_Filtering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
#hide
!pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

[K     |████████████████████████████████| 720 kB 7.6 MB/s 
[K     |████████████████████████████████| 1.2 MB 40.2 MB/s 
[K     |████████████████████████████████| 189 kB 44.8 MB/s 
[K     |████████████████████████████████| 46 kB 4.9 MB/s 
[K     |████████████████████████████████| 56 kB 4.9 MB/s 
[K     |████████████████████████████████| 51 kB 356 kB/s 
[?25hMounted at /content/gdrive


In [4]:
#hide
from fastbook import *

In [5]:
from fastai.collab import *
from fastai.tabular.all import *
path = untar_data(URLs.ML_100k)

In [6]:
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None,
                      names=['user','movie','rating','timestamp'])
ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [7]:
last_skywalker = np.array([0.98,0.9,-0.9])

In [8]:
user1 = np.array([0.9,0.8,-0.6])

In [9]:
(user1*last_skywalker).sum()

2.1420000000000003

In [10]:
casablanca = np.array([-0.99,-0.3,0.8])

In [11]:
(user1*casablanca).sum()

-1.611

In [12]:
movies = pd.read_csv(path/'u.item',  delimiter='|', encoding='latin-1',
                     usecols=(0,1), names=('movie','title'), header=None)
movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [13]:
ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [14]:
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)
dls.show_batch()

Unnamed: 0,user,title,rating
0,542,My Left Foot (1989),4
1,422,Event Horizon (1997),3
2,311,"African Queen, The (1951)",4
3,595,Face/Off (1997),4
4,617,Evil Dead II (1987),1
5,158,Jurassic Park (1993),5
6,836,Chasing Amy (1997),3
7,474,Emma (1996),3
8,466,Jackie Chan's First Strike (1996),3
9,554,Scream (1996),3


In [15]:
n_users = len(dls.classes['user'])
n_movies = len(dls.classes['title'])
n_factors = 5

In [17]:
user_factors = torch.randn(n_users,n_factors)
movie_factors = torch.randn(n_factors,n_factors)

In [18]:
one_hot_3 = one_hot(3,n_users).float()
user_factors.t() @one_hot_3

tensor([-0.4586, -0.9915, -0.4052, -0.3621, -0.5908])

In [19]:
user_factors[3]

tensor([-0.4586, -0.9915, -0.4052, -0.3621, -0.5908])

In [20]:
#collaborative filtering from scratch 

In [30]:
class DotProduct(Module):
  def __init__(self,n_users,n_movies,n_factories):
     self.user_factors = Embedding(n_users,n_factors)
     self.movie_factors = Embedding(n_movies,n_factors)


  def forward(self,x):    
    users = self.user_factors(x[:,0])
    movies = self.movie_factors(x[:,1])
    return (users*movies).sum(dim=1) 


In [31]:
x , y = dls.one_batch()
x.shape

torch.Size([64, 2])

In [32]:
model = DotProduct(n_users,n_movies,50)
learn = Learner(dls,model,loss_func = MSELossFlat())

In [33]:
learn.fit_one_cycle(5,5e-3)

epoch,train_loss,valid_loss,time
0,4.499391,3.765964,00:07
1,1.102756,1.140128,00:07
2,0.988053,1.009402,00:07
3,0.932944,0.974472,00:07
4,0.871686,0.971667,00:07


In [34]:
class DotProduct(Module):
      def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
            self.user_factors = Embedding(n_users, n_factors)
            self.movie_factors = Embedding(n_movies, n_factors)
            self.y_range = y_range
     
      def forward(self, x):
          users = self.user_factors(x[:,0])
          movies = self.movie_factors(x[:,1])
          return sigmoid_range((users * movies).sum(dim=1), *self.y_range)
   
   
model = DotProduct(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,0.978939,1.002083,00:09
1,0.879071,0.908885,00:09
2,0.686157,0.87253,00:09
3,0.50276,0.870668,00:08
4,0.361506,0.874904,00:09


In [36]:
class DotProductBias(Module):
  def __init__(self,n_users,n_movies,n_factors,y_range=(0,5.5)):
    self.user_factors = Embedding(n_users,n_factors)
    self.user_bias = Embedding(n_users,1)
    self.movie_factors = Embedding(n_movies, n_factors)
    self.movie_bias = Embedding(n_movies, 1)
    self.y_range = y_range

    
  def forward(self, x): 
    users = self.user_factors(x[:,0])
    movies = self.movie_factors(x[:,1])
    res = (users * movies).sum(dim=1, keepdim=True)
    res += self.user_bias(x[:,0]) + self.movie_bias(x[:,1]) 
    return sigmoid_range(res, *self.y_range)

In [37]:
model = DotProductBias(n_users,n_movies,50)
learn = Learner(dls,model,loss_func=MSELossFlat())
learn.fit_one_cycle(5,5e-3)

epoch,train_loss,valid_loss,time
0,0.954575,0.946176,00:08
1,0.803265,0.86881,00:09
2,0.614177,0.862008,00:09
3,0.430558,0.883941,00:09
4,0.308505,0.890569,00:08


In [38]:
#WeigthDecay L2 regularisation

In [43]:
#loss_with_wd = loss +wd*(2**parameters).sum()
#parameters.grad += wd * 2 * parameters
model = DotProductBias(n_users,n_movies,50)
learn = Learner(dls,model,loss_func=MSELossFlat())
learn.fit_one_cycle(5,5e-3,wd=0.1)

epoch,train_loss,valid_loss,time
0,0.946054,0.955268,00:09
1,0.874612,0.87954,00:08
2,0.7248,0.834866,00:09
3,0.605803,0.820783,00:09
4,0.49379,0.821467,00:08


In [44]:
#Creating our own embedded module 

In [45]:
class T(Module):
  def __init__(self):self.a = torch.ones(3)

  

In [46]:
L(T().parameters())

(#0) []

In [47]:
class T(Module):
  def __init__(self):self.a = nn.Parameter(torch.ones(3))
  

In [48]:
L(T().parameters())

(#1) [Parameter containing:
tensor([1., 1., 1.], requires_grad=True)]

In [51]:
class T(Module):
  def __init__(self):self.a = nn.Linear(1,3,bias = False)



In [53]:
t = T()
L(t.parameters())

(#1) [Parameter containing:
tensor([[-0.3722],
        [ 0.1187],
        [ 0.5696]], requires_grad=True)]

In [54]:
type(t.a.weight)

torch.nn.parameter.Parameter

In [55]:
def create_params(size):
  return nn.Parameter(torch.zeros(*size).normal_(0,0.01))

In [56]:
class DotProductBias(Module):
  def __init__(self,n_users,n_movies,n_factors,y_range=(0,0.5)):
    self.user_factors = create_params([n_users,n_factors])
    self.user_bias= create_params([n_users,n_factors])
    self.movie_factors = create_params([n_movies, n_factors])
    self.movie_bias = create_params([n_movies])
    self.y_range = y_range

  def forward(self,x):
    users = self.user_factors[x[:,0]]
    movies = self.movie_factors[x[:,1]]
    res =(users*movies).sum(dim = 1) 
    res += self.user_bias[x[:,0]] + self.movie_bias[x[:,1]] 
    return sigmoid_range(res,*self.y_range)

In [59]:
class DotProductBias(Module):
  def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
            self.user_factors = create_params([n_users, n_factors])
            self.user_bias = create_params([n_users])
            self.movie_factors = create_params([n_movies, n_factors])
            self.movie_bias = create_params([n_movies])
            self.y_range = y_range

  def forward(self, x):
      users = self.user_factors[x[:,0]]
      movies = self.movie_factors[x[:,1]]
      res = (users*movies).sum(dim=1)
      res += self.user_bias[x[:,0]] + self.movie_bias[x[:,1]] 
      return sigmoid_range(res, *self.y_range)



model = DotProductBias(n_users,n_movies,50)
learn = Learner(dls,model,loss_func=MSELossFlat())
learn.fit_one_cycle(5,5e-3,wd=0.1)

epoch,train_loss,valid_loss,time
0,0.937083,0.952719,00:08
1,0.841608,0.877041,00:08
2,0.71758,0.835066,00:08
3,0.591144,0.822426,00:08
4,0.47191,0.823228,00:08


In [60]:
#Interpreting Embeddings and biases

In [61]:
movie_bias= learn.model.movie_bias.squeeze()
idxs= movie_bias.argsort()[:5]
[dls.classes['title'][i] for i in idxs]

['Children of the Corn: The Gathering (1996)',
 'Lawnmower Man 2: Beyond Cyberspace (1996)',
 'Robocop 3 (1993)',
 'Mortal Kombat: Annihilation (1997)',
 'Bio-Dome (1996)']

In [63]:
idxs = movie_bias.argsort(descending = True)[:5]
[dls.classes['title'][i] for i in idxs]

['Titanic (1997)',
 'L.A. Confidential (1997)',
 'Shawshank Redemption, The (1994)',
 'Silence of the Lambs, The (1991)',
 'Rear Window (1954)']

In [64]:
learn = collab_learner(dls,n_factors = 50,y_range=(0,5.5))
learn.fit_one_cycle(5,5e-3,wd=0.1)

epoch,train_loss,valid_loss,time
0,0.947183,0.956425,00:09
1,0.86796,0.873976,00:09
2,0.721915,0.838498,00:09
3,0.598081,0.822186,00:08
4,0.483816,0.82227,00:08


In [65]:
learn.model

EmbeddingDotBias(
  (u_weight): Embedding(944, 50)
  (i_weight): Embedding(1665, 50)
  (u_bias): Embedding(944, 1)
  (i_bias): Embedding(1665, 1)
)

In [66]:
movie_bias = learn.model.i_bias.weight.squeeze()
idxs = movie_bias.argsort(descending= True)[:5]
[dls.classes['title'][i] for i in idxs] 

['L.A. Confidential (1997)',
 "Schindler's List (1993)",
 'Silence of the Lambs, The (1991)',
 'Titanic (1997)',
 'Star Wars (1977)']

In [68]:
movie_factors = learn.model.i_weight.weight
idx  = dls.classes['title'].o2i['Silence of the lamnbs,The (1991)']
distances  = nn.CosineSimilarity(dim =1 )(movie_factors,movie_factors[idx][None])
idx = distances.argsort(descending = True)[1]
dls.classes['title'][idx]

'Mother Night (1996)'

In [69]:
#Bootstraping a Collaborative Filtering Model 

In [70]:
#DeepLearning for collaborative filtering 

In [71]:
embs = get_emb_sz(dls)
embs

[(944, 74), (1665, 102)]

In [79]:
class CollabNN(Module):
  def __init__(self,user_sz,item_sz,y_range =(0,5.5),n_act = 100):
    self.user_factors = Embedding(*user_sz)
    self.item_factors = Embedding(*item_sz)
    self.layers = nn.Sequential(nn.Linear(user_sz[1]+item_sz[1], n_act),nn.ReLU(),nn.Linear(n_act,1))
    self.y_range = y_range

  def forward(self,x):
    embs = self.user_factors(x[:,0]),self.item_factors(x[:,1])
    x = self.layers(torch.cat(embs,dim = 1))
    return sigmoid_range(x,*self.y_range)  

In [80]:
model = CollabNN(*embs)
learn = Learner(dls,model,loss_func=MSELossFlat())
learn.fit_one_cycle(5,5e-3,wd= 0.01)


epoch,train_loss,valid_loss,time
0,0.946222,0.952826,00:10
1,0.883764,0.910645,00:10
2,0.881372,0.884657,00:10
3,0.813196,0.87106,00:10
4,0.745367,0.875766,00:10


In [81]:
learn = collab_learner(dls,use_nn = True ,y_range =(0,5.5),layers= [100,50])
learn.fit_one_cycle(5,5e-3,wd = 0.1)


epoch,train_loss,valid_loss,time
0,0.990741,0.978144,00:12
1,0.913738,0.926937,00:12
2,0.890519,0.883574,00:12
3,0.80986,0.85069,00:12
4,0.722844,0.859319,00:12


In [82]:
@delegates(TabularModel)
class EmbeddingNN(TabularModel):
    def __init__(self, emb_szs, layers, **kwargs):
        super().__init__(emb_szs, layers=layers, n_cont=0, out_sz=1, **kwargs)
