In [1]:
# base imports
import pandas as pd
import numpy as np

In [16]:
# Column met_o had values 1/2. It needs to be changed to 0/1.
df = pd.read_csv('./data_for_matrix.csv')
df['met_o'] = df['met_o'].apply(lambda x: x - 1)
df.to_csv('data_for_matrix.csv', index=False)

### Below you can see code that prepares data for the basic matrix factorization.
Here in the base matrix we only have information about match. So we need a set of vectors, where each vector describes each date (holds both ids and information about match).

Task will requite two base matrices.
1. Matrix where men are "users" and women are "products". It will then be used to recommend women to men because matrix will say what's the predicted rating of a woman in eyes of man. Basically it will answer the question: **"How likely is that a man will like a woman?"**. Let's call this matrix/data frame **"men_like_women"**.
1. Matrix where women are "users" and men are "products". It will then be used to recommend men to women because matrix will say what's the predicted rating of a man in eyes of woman. Basically it will answer the question: **"How likely is that a woman will like a man?"**. Let's call this matrix/data frame **"women_like_men"**.

Why such analogies? It may help to understand how do this human relations task translates into recommender systems world.

These matrices will be used to train models (with matrix factorization) which will then be saved into csv files.

In [78]:
# Split into vectors, let's have two matrices as describes above.
base_df = pd.read_csv('./data_for_matrix.csv')
men_like_women_data = []
women_like_men_data = []

for _, row in base_df.iterrows():
    vector = {
        'id': row['iid'],
        'pid': row['pid'],
        'match': row['match'],
    }
    if row['gender'] == 0:
        women_like_men_data.append(vector)
    else:
        men_like_women_data.append(vector)

men_like_women_df = pd.DataFrame(men_like_women_data)
women_like_men_df = pd.DataFrame(women_like_men_data)

print("men_like_women_df:")
print(men_like_women_df)
print("\nwomen_like_men_df:")
print(women_like_men_df)

men_like_women_df:
         id    pid  match
0      11.0    1.0    0.0
1      11.0    2.0    0.0
2      11.0    3.0    0.0
3      11.0    4.0    0.0
4      11.0    5.0    0.0
...     ...    ...    ...
4179  552.0  526.0    0.0
4180  552.0  527.0    0.0
4181  552.0  528.0    0.0
4182  552.0  529.0    0.0
4183  552.0  530.0    0.0

[4184 rows x 3 columns]

women_like_men_df:
         id    pid  match
0       1.0   11.0    0.0
1       1.0   12.0    0.0
2       1.0   13.0    1.0
3       1.0   14.0    1.0
4       1.0   15.0    1.0
...     ...    ...    ...
4179  530.0  548.0    0.0
4180  530.0  549.0    0.0
4181  530.0  550.0    0.0
4182  530.0  551.0    0.0
4183  530.0  552.0    0.0

[4184 rows x 3 columns]


In [79]:
import torch
import torch.nn as nn
import torch.nn.functional as F

dev = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# dev = torch.device('cpu')

## Let's create matrix factorization models

We will create and train 4 base MF (matrix factorization) models:
1. without bias and uniform (0, 0.2) weight initialization,
2. without bias and xavier initialization,
3. with bias and uniform (0, 0.2) weight initialization,
4. with bias and xavier initialization,

For each model will do a cross validation to learn the best hyperparameters and then we will compare the results and choose the best model.

Some general explanations for models:
* Models are train on only one batch because our data set is rather small.

Good reading resource: https://towardsdatascience.com/weight-initialization-techniques-in-neural-networks-26c649eb3b78

### First matrix factorization without bias

In [80]:
class MatrixFactorizationWithoutBiasNoXavier(nn.Module):
    def __init__(self, num_people, num_partners, emb_size=100):
        super(MatrixFactorizationWithoutBiasNoXavier, self).__init__()
        self.person_emb = nn.Embedding(num_people, emb_size)
        self.partner_emb = nn.Embedding(num_partners, emb_size)
        self.person_emb.weight.data.uniform_(0,0.2)
        self.partner_emb.weight.data.uniform_(0,0.2)
        
    def forward(self, u, v):
        u = self.person_emb(u)
        v = self.partner_emb(v)
        # calculate dot product
        # u*v is a element wise vector multiplication
        return torch.sigmoid((u*v).sum(1))
    
    
class MatrixFactorizationWithoutBiasXavier(nn.Module):
    def __init__(self, num_people, num_partners, emb_size=100):
        super(MatrixFactorizationWithoutBiasXavier, self).__init__()
        self.person_emb = nn.Embedding(num_people, emb_size)
        self.partner_emb = nn.Embedding(num_partners, emb_size)
        torch.nn.init.xavier_uniform_(self.person_emb.weight)
        torch.nn.init.xavier_uniform_(self.partner_emb.weight)
        
    def forward(self, u, v):
        u = self.person_emb(u)
        v = self.partner_emb(v)
        # calculate dot product
        # u*v is a element wise vector multiplication
        return torch.sigmoid((u*v).sum(1))

    
# Example small models demonstrating weights
example_model_no_xavier = MatrixFactorizationWithoutBiasNoXavier(10, 10, 3)
example_model_xavier = MatrixFactorizationWithoutBiasXavier(10, 10, 3)
print("Model with without xavier weights are:\n")
for p in example_model_no_xavier.parameters():
    print(p)
print('\n\n', '='*20)
print("\n\nModel with with xavier weights are:\n")
for p in example_model_xavier.parameters():
    print(p)

Model with without xavier weights are:

Parameter containing:
tensor([[0.1165, 0.1454, 0.1457],
        [0.1668, 0.1625, 0.0381],
        [0.1179, 0.1348, 0.0313],
        [0.1238, 0.0428, 0.1744],
        [0.0044, 0.1080, 0.0856],
        [0.1158, 0.1235, 0.0748],
        [0.0955, 0.0570, 0.0803],
        [0.0704, 0.0698, 0.1251],
        [0.1515, 0.0341, 0.0454],
        [0.0787, 0.1212, 0.1604]], requires_grad=True)
Parameter containing:
tensor([[0.0804, 0.1107, 0.1519],
        [0.0745, 0.1109, 0.0147],
        [0.0850, 0.0024, 0.0661],
        [0.1420, 0.1830, 0.0662],
        [0.0666, 0.0978, 0.0128],
        [0.0281, 0.0421, 0.1227],
        [0.1042, 0.0273, 0.1008],
        [0.1603, 0.0317, 0.1695],
        [0.0523, 0.0807, 0.1506],
        [0.1386, 0.1896, 0.1830]], requires_grad=True)




Model with with xavier weights are:

Parameter containing:
tensor([[ 0.5133,  0.4492,  0.2632],
        [ 0.0120,  0.3370,  0.4605],
        [ 0.3839,  0.3146,  0.2130],
        [ 0.3835,  0

### First matrix factorization without bias

In [81]:
class MatrixFactorizationWithBiasNoXavier(nn.Module):
    def __init__(self, num_people, num_partners, emb_size=100):
        super(MatrixFactorizationWithBiasNoXavier, self).__init__()
        self.person_emb = nn.Embedding(num_people, emb_size)
        self.person_bias = nn.Embedding(num_people, 1)
        self.partner_emb = nn.Embedding(num_partners, emb_size)
        self.parnter_bias = nn.Embedding(num_partners, 1)
        torch.nn.init.xavier_uniform_(self.person_emb.weight)
        torch.nn.init.xavier_uniform_(self.partner_emb.weight)
        self.person_bias.weight.data.uniform_(-0.01,0.01)
        self.parnter_bias.weight.data.uniform_(-0.01,0.01)
            
    def forward(self, u, v):
        u = self.person_emb(u)
        v = self.partner_emb(v)
        # calculate dot product
        # u*v is a element wise vector multiplication
        return torch.sigmoid((u*v).sum(1))
    
    
class MatrixFactorizationWithBiasNoXavier(nn.Module):
    def __init__(self, num_people, num_partners, emb_size=100):
        super(MatrixFactorizationWithBiasNoXavier, self).__init__()
        self.person_emb = nn.Embedding(num_people, emb_size)
        self.person_bias = nn.Embedding(num_people, 1)
        self.partner_emb = nn.Embedding(num_partners, emb_size)
        self.parnter_bias = nn.Embedding(num_partners, 1)
        self.person_emb.weight.data.uniform_(0,0.2)
        self.partner_emb.weight.data.uniform_(0,0.2)
        self.person_bias.weight.data.uniform_(-0.01,0.01)
        self.parnter_bias.weight.data.uniform_(-0.01,0.01)
            
    def forward(self, u, v):
        u = self.person_emb(u)
        v = self.partner_emb(v)
        # calculate dot product
        # u*v is a element wise vector multiplication
        return torch.sigmoid((u*v).sum(1))
    

# Example small models demonstrating weights
example_model_no_xavier = MatrixFactorizationWithBiasNoXavier(10, 10, 3)
example_model_xavier = MatrixFactorizationWithBiasNoXavier(10, 10, 3)
print("Model with without xavier weights are:\n")
for p in example_model_no_xavier.parameters():
    print(p)
print('\n\n', '='*20)
print("\n\nModel with with xavier weights are:\n")
for p in example_model_xavier.parameters():
    print(p)

Model with without xavier weights are:

Parameter containing:
tensor([[0.0545, 0.1007, 0.0864],
        [0.0694, 0.0766, 0.0097],
        [0.0142, 0.1923, 0.0251],
        [0.1078, 0.1211, 0.1873],
        [0.1948, 0.0419, 0.1350],
        [0.0832, 0.0890, 0.0360],
        [0.0747, 0.0085, 0.1849],
        [0.1025, 0.1268, 0.0129],
        [0.0519, 0.1296, 0.0545],
        [0.0055, 0.1816, 0.0199]], requires_grad=True)
Parameter containing:
tensor([[-9.4937e-04],
        [ 5.7499e-03],
        [ 3.0023e-03],
        [ 9.9753e-05],
        [-8.8526e-03],
        [ 3.7127e-03],
        [-8.8180e-03],
        [-2.0045e-03],
        [-9.3507e-03],
        [ 4.6299e-03]], requires_grad=True)
Parameter containing:
tensor([[0.1702, 0.1083, 0.0865],
        [0.1194, 0.1894, 0.0809],
        [0.1433, 0.0724, 0.0087],
        [0.1735, 0.1368, 0.0478],
        [0.1554, 0.1679, 0.0237],
        [0.0425, 0.1670, 0.0501],
        [0.1350, 0.1373, 0.0028],
        [0.1156, 0.1057, 0.1406],
        [0

### Training and testing functions are below

In [82]:
def test(model, df_test):
    model.eval()
    # .to(dev) puts code on either gpu or cpu.
    people = torch.LongTensor(df_val.userId.values).to(dev)
    partners = torch.LongTensor(df_val.movieId.values).to(dev)
    attraction = torch.FloatTensor(df_val.rating.values).to(dev)
    y_hat = model(people, partners)
    loss = F.mse_loss(y_hat, attraction)
    print('test loss %.3f ' % loss.item())


# Default values assigned below are ones that I found online.
# Cross validadtion will be done later but it's good to have some defaults.
def train(model, df_train, epochs=100, learning_rate=0.01, weight_decay=1e-5):
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    model.train()
    
    for epoch in range(epochs):
        # From numpy to PyTorch tensors.
        # .to(dev) puts code on either gpu or cpu.
        people = torch.LongTensor(df_train.id.values).to(dev)
        partners = torch.LongTensor(df_train.pid.values).to(dev)
        attraction = torch.FloatTensor(df_train.match.values).to(dev)
        
        # calls forward method of the model
        y_hat = model(people, partners)
        # Using mean squared errors loss function
        loss = F.mse_loss(y_hat, attraction)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if epoch % 10: 
            print(loss.item())

### Let's prepare train and test data sets
Standard sklearn function `train_test_split` doesn't do a job here because both test and train data sets should include all people. So after using `train_test_split` we need to transition some people between sets to ensure that both sets are correct.

In [127]:
from sklearn.model_selection import train_test_split as tts

def train_test_split(df, test_size=0.2):
    # use standard train_test_split
    df_train, df_test = tts(df, test_size=test_size)
    no_train_unique = len(df_train.id.unique())
    no_test_unique = len(df_test.id.unique())
    # see what people are missing in test set.
    diff = np.setdiff1d(df_train.id.unique(), df_test.id.unique())
    for id in diff:
        # For every missing person we need to change it for
        # different so the sets still have the same number of elements.
        to_send_to_test = df_train.loc[df_train['id'] == id].iloc[0]
        to_send_to_train = None
        # Choose some person from the test set to send to the train set.
        ids = df_test.id.unique()
        np.random.shuffle(ids)
        for test_id in ids:
            person = df_test.loc[df_test['id'] == test_id]
            if len(person.index) > 1:
                to_send_to_train = person.iloc[0]
                break
        if to_send_to_train is not None:
            # Remove people that transit from old sets.
            # .name holds the id of that row in the oryginal df.
            df_train = df_train.drop(to_send_to_test.name)
            df_test = df_test.drop(to_send_to_train.name)
            # Add new people to sets.
            df_train = pd.concat([df_train, to_send_to_train.to_frame().T], ignore_index=True)
            df_test = pd.concat([df_test, to_send_to_test.to_frame().T], ignore_index=True)
        else:
            raise Exception("Couldn't find any element to send from test to train.")
    return df_train.sort_values(by='id'), df_test.sort_values(by='id')


a, b = tts(men_like_women_df, test_size=0.2)
men_like_women_train_df, men_like_women_test_df = train_test_split(men_like_women_df, test_size=0.2)
men_like_women_no_men = len(men_like_women_train_df.id.unique())
men_like_women_no_women = len(men_like_women_train_df.pid.unique())

women_like_men_train_df, women_like_men_test_df = train_test_split(women_like_men_df, test_size=0.2)
women_like_men_no_women = len(women_like_men_train_df.id.unique())
women_like_men_no_men = len(women_like_men_train_df.pid.unique())

         id    pid  match
0      11.0    1.0    0.0
1      11.0    2.0    0.0
2      11.0    3.0    0.0
3      11.0    4.0    0.0
4      11.0    5.0    0.0
...     ...    ...    ...
4179  552.0  526.0    0.0
4180  552.0  527.0    0.0
4181  552.0  528.0    0.0
4182  552.0  529.0    0.0
4183  552.0  530.0    0.0

[4184 rows x 3 columns]
         id    pid  match
3132   11.0   10.0    0.0
1083   11.0    1.0    0.0
2074   11.0    9.0    0.0
1073   11.0    8.0    0.0
3014   11.0    4.0    0.0
...     ...    ...    ...
2252  552.0  519.0    0.0
351   552.0  510.0    1.0
3304  552.0  526.0    0.0
2448  552.0  522.0    0.0
2833  552.0  530.0    0.0

[3347 rows x 3 columns]
277
277


## Let's finally train models and choose the best one

### Let's first train men_like_women

In [None]:
model_no_bias_no_xavier = MatrixFactorizationWithoutBiasNoXavier(men_like_women_no_men, men_like_women_no_women).to(dev)

train(model_no_bias_no_xavier, men_like_women_train_df, weight_decay=0.01)
test(model_no_bias_no_xavier, men_like_women_test_df)