In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from DataSet import RecipeRecommendation
from torch.utils.data import DataLoader
import statistics
from datetime import datetime
import time
import copy
import random
import json
import ast

In [4]:
if torch.cuda.is_available():
    print("gpu")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    

gpu


In [5]:
recipe2vec = pd.read_csv('recipe2vec_1203.csv')

In [6]:
recipe2vec.head()

Unnamed: 0,i,ingredients,vector,indexes
0,0,"['great northern beans', 'yellow onion', 'dice...","[0.04360911116666666, -0.005143107888888889, 0...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 7032, 7032, 7032, ..."
1,1,"[""devil's food cake mix"", 'vegetable oil', 'eg...","[-0.17808505125000001, -0.18320775649999999, 0...","[9, 10, 11, 12, 7032, 7032, 7032, 7032, 7032, ..."
2,2,"['mayonnaise', 'salsa', 'cheddar cheese', 'ref...","[-0.045958727461538465, 0.15744469853846155, 0...","[13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 2..."
3,3,"['chicken tenders', 'flour', 'garlic powder', ...","[0.010844033750000003, -0.11082101774999999, -...","[26, 27, 4, 28, 29, 30, 10, 31, 32, 33, 34, 35..."
4,4,"['lamb shoulder', 'salt', 'ground black pepper...","[0.026242995312500003, -0.16281941465, 0.00755...","[36, 28, 37, 10, 38, 39, 27, 40, 41, 42, 43, 3..."


In [7]:
recipe2vec_dict = recipe2vec.set_index('i')['indexes'].to_dict()

In [8]:
# list_1 = json.loads(recipe2vec_dict[0])
# list_2 = json.loads(recipe2vec_dict[1])
# print(list_1)
# print(list_2)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 7032, 7032, 7032, 7032, 7032, 7032, 7032, 7032, 7032, 7032, 7032]
[9, 10, 11, 12, 7032, 7032, 7032, 7032, 7032, 7032, 7032, 7032, 7032, 7032, 7032, 7032, 7032, 7032, 7032, 7032]


In [9]:
training_data_path = 'newdataset/interactions_train_1128.csv'
# test becomes validation
validation_data_path = 'newdataset/interactions_validation_1128.csv'
whole_data_path = 'newdataset/whole_data_1128.csv'

In [10]:
def check_negative_sample(whole_data_path):
    whole_data = pd.read_csv(whole_data_path)
    # tuple format
    key = zip(whole_data['u'], whole_data['i'])
    whole_data_dict = whole_data.set_index(key)['rating'].to_dict()
    return whole_data_dict

In [11]:
data_dict = check_negative_sample(whole_data_path)

In [12]:
class RecipeNCFModel_v2(torch.nn.Module):
    def __init__(self, u_emb_size, u_emb_dimension, r_emb_size, r_emb_dimension, pretrained_weight=None, ingre_embedding_tensors='Ingredient_embeddings_1202.pt'):
        
        super(RecipeNCFModel_v2, self).__init__()
        self.u_emb_size = u_emb_size
        self.u_emb_dimension = u_emb_dimension
        self.r_emb_size = r_emb_size
        self.r_emb_dimension = r_emb_dimension
        self.input_size = self.u_emb_dimension + self.r_emb_dimension
        
        self.ingre_embedding_tensors = torch.load(ingre_embedding_tensors)
        
        self.pretrained_weight = pretrained_weight
        
        self.ingredient_embedding = nn.Embedding.from_pretrained(self.ingre_embedding_tensors) 
        
        self.users_embedding = nn.Embedding(self.u_emb_size, self.u_emb_dimension)
        self.recipe_embedding = nn.Embedding(self.r_emb_size, self.r_emb_dimension)
        

        self.hidden = nn.Sequential(nn.Linear(self.input_size, 16*2),
                                    nn.ReLU(),
                                    nn.Linear(16*2, 8*2),
                                    nn.ReLU())
#                                     nn.Linear(256, 128),
#                                     nn.ReLU())
        
        self.fc = nn.Sequential(nn.Linear(6, 16*2),nn.ReLU())
        
        self.cnn1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=2, kernel_size=(4, 100), stride=1, padding=0),
            nn.ReLU())
        
        self.cnn2 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=2, kernel_size=(3, 100), stride=1, padding=0),
            nn.ReLU())
        
        self.cnn3 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=2, kernel_size=(2, 100), stride=1, padding=0),
            nn.ReLU())
      
        self.sigmoid = nn.Sigmoid()
        
        self.out = nn.Linear(8*2, 1)
        
        self._init()
    
    # initialize embedding layers and fc layers properly
    def _init(self):
        
        def init(m):
            if type(m) == nn.Linear:
                torch.nn.init.xavier_uniform_(m.weight)
                m.bias.data.fill_(0.01)
                
        self.users_embedding.weight.data.uniform_(-0.05, 0.05)
        if self.pretrained_weight is not None:
            self.recipe_embedding.from_pretrained(self.pretrained_weight,freeze=False)
            print('Use recipes embeddings successfully.')
        else:
            self.recipe_embedding.weight.data.uniform_(-0.05, 0.05)
        self.hidden.apply(init)
        init(self.fc)
        print('Weights have been initialized.')
        
    def forward(self, u, r):
        # we should obtain the ingredients embeddings based on r whose data type is LongTensor
        
        u_emb = self.users_embedding(u)
#         r_array = r.cpu().numpy().squeeze()
        bs,_ = u.shape
        # batch_size, 1, 20, 100: the shape of ingre
        #  the number of channel is one
        ingre = self.ingredient_embedding(r)
        ingre = ingre.view(-1,1,20,100)
        ingre_1 = self.cnn1(ingre)
        ingre_1 = nn.MaxPool1d(ingre_1.shape[2])(ingre_1.view(-1,ingre_1.shape[1],ingre_1.shape[2]))
        ingre_2 = self.cnn2(ingre)
        ingre_2 = nn.MaxPool1d(ingre_2.shape[2])(ingre_2.view(-1,ingre_2.shape[1],ingre_2.shape[2]))
        ingre_3 = self.cnn3(ingre)
        ingre_3 = nn.MaxPool1d(ingre_3.shape[2])(ingre_3.view(-1,ingre_3.shape[1],ingre_3.shape[2]))
        r_emb = self.recipe_embedding(r)
        # 2 6 1
        cat = torch.cat((ingre_1, ingre_2, ingre_3), dim = 1)
        cat = self.fc(cat.view(-1,6))
        x = torch.cat((u_emb, cat.view(-1,1,self.r_emb_dimension)), dim=2)
        
        x = x.view(-1, self.input_size)
        x = self.hidden(x)
        out = self.out(x)
        return self.sigmoid(out)

In [13]:
u_emb_size = 25076
u_emb_dimension = 32
r_emb_size = 178265
r_emb_dimension = 32

In [14]:
model = RecipeNCFModel_v2(u_emb_size, u_emb_dimension, r_emb_size, r_emb_dimension)
model(torch.LongTensor([[1],[2]]), torch.LongTensor([list_1,list_2]))

Weights have been initialized.


tensor([[0.5073],
        [0.5129]], grad_fn=<SigmoidBackward>)

<p>load the pretrain model and print each layer's information</p>

<p>Use the pretrain model</p>

In [15]:
# recipe2vec = pd.read_csv('recipe2vec_1127.csv')
# recipe2vec_array = recipe2vec['vector'].to_numpy()
# recipe2vec_ = torch.Tensor([json.loads(each) for each in recipe2vec_array])

In [16]:
model = RecipeNCFModel_v2(u_emb_size, u_emb_dimension, r_emb_size, r_emb_dimension)
model = model.to(device)

Weights have been initialized.


In [17]:
batch_size = 512
learningRate = 1e-4
# criterion = nn.MSELoss()
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learningRate)
# optimizer = torch.optim.SGD(model.parameters(), lr=learningRate, momentum=0.9)
epochs = 100000
patience = 4
no_improvements = 0
best_ratio = -3
best_weights = None
training_loss_list = []
validation_loss_list = []

In [18]:
train_dataset = RecipeRecommendation(training_data_path,scale=True)
validation_dataset = RecipeRecommendation(validation_data_path,scale=True)
train_data = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, collate_fn=train_dataset.collate)
validation_data = DataLoader(validation_dataset, batch_size=1, shuffle=True, num_workers=0, collate_fn=validation_dataset.collate)
print('{} samples for training, {} samples for validation'.format(len(train_dataset), len(validation_dataset)))

580015 samples for training, 5312 samples for validation


In [19]:
# torch.empty(3, dtype=torch.long).random_(5).shape

In [20]:
negative_samples_dict = {}
hit_ratio_list = []
negative_sample_ratio = 8

In [21]:
result_dict = {}

In [1]:
print('Training is started....')
# record the start time
start_time = time.time()

for epoch in range(epochs):
    epoch_training_loss = []
    
    # name conflict!!! the index has the same name with recipe variable
    for index_train, (u, r, rating) in enumerate(train_data):
        
        # obtain the batch size
        bs, _ = u.shape
        total_bs = bs * (1+negative_sample_ratio)
        generated_batch = np.zeros((total_bs, 3))
        
        # convert tensor type to np.array
        u_array = u.numpy().squeeze()
        r_array = r.numpy().squeeze()
        
        # clear gradient
        optimizer.zero_grad()
        # sample negative (u,r) if ratio is one
        #------------------------------------------------------#
        negative_u = []
        negative_r = []
#         u_emb_size = 25076 [0,25075]
#         r_emb_size = 178265 [0,178264]
        # sample one negative for each positive interaction with the same user
        for idx, each_user in enumerate(u_array):
#             indexes = json.loads(recipe2vec_dict[r_array[idx]])
            generated_batch[idx, :] = (each_user,r_array[idx], 1)
        idx = idx + 1
        
        # produce neg samples
        for each_user in u_array:
            train_count = 0
            while train_count < negative_sample_ratio:
#                 Return a random integer N such that a <= N <= b
                r_selected = random.randint(0, r_emb_size-1)
                if (each_user, r_selected) in data_dict.keys():
                    continue
                else:
#                     indexes = json.loads(recipe2vec_dict[r_selected])
                    negative_samples_dict[(each_user,r_selected)] = 0
                    negative_u.append(each_user)
                    negative_r.append(r_selected)
                    train_count = train_count + 1
                    
                    generated_batch[idx, :] = (each_user, r_selected, 0)
                    idx = idx + 1
        
#         shuffle generated_batch obtained

        np.random.shuffle(generated_batch)
        indexes = []
        each_list = generated_batch[:,1]
        for each in each_list:
            i_ = json.loads(recipe2vec_dict[int(each)])
            indexes.append(i_)   
        indexes_train = torch.LongTensor(indexes).to(device)

#         ingre = get_ingre_embedding_tensors(generated_batch[:,1]).to(device)
        u_train =  torch.LongTensor(generated_batch[:,0]).view(-1,1).to(device)
#         r_train = torch.LongTensor(generated_batch[:,1]).view(-1,1).to(device)
        label_train = torch.Tensor(generated_batch[:,2]).view(-1,1).to(device)
        output = model(u_train,indexes_train)
        
        # calculate the loss
        # CrossEntropyLoss
        loss = criterion(output, label_train)
        # doing backpropagation
        loss.backward()
        # get the current loss value
        current_loss = loss.item()
        if index_train % 50 == 0:
            print(current_loss)
        
        epoch_training_loss.append(current_loss)
        # update parameters
        optimizer.step()
      
    
    training_loss_list.append(statistics.mean(epoch_training_loss))
    
    print('epoch {}, training loss {:.4f}'.format(epoch, training_loss_list[epoch])) 
    
    # evaluate
    hit = 0
    
    with torch.no_grad():
        for index_validation, (u_validation, r_validation, rating_validation) in enumerate(validation_data):
            va_negative_samples_dict = {}
            u_validation_array = int(u_validation.numpy().squeeze())
            r_validation_array = int(r_validation.numpy().squeeze())
            positive_u_r = (u_validation_array, r_validation_array)
            
            r_validation = torch.LongTensor(json.loads(recipe2vec_dict[r_validation_array])).to(device)
            
#             ingre = get_ingre_embedding_tensors([u_validation.numpy().squeeze()]).to(device)
            
            u_validation = u_validation.to(device)
#             r_validation = r_validation.to(device)
            probability = model(u_validation,r_validation).cpu().numpy().squeeze()
            probability = float(probability)
       
            negative_validation_u = []
            negative_validation_r = []
            
            count = 0
            while count < 100:
                r_validation_selected = random.randint(0, r_emb_size-1)
                sample = (u_validation_array, r_validation_selected)
                if  sample in data_dict.keys():
                    continue
                else:
                    va_negative_samples_dict[(u_validation_array,r_validation_selected)] = 0
                    negative_validation_u.append(u_validation_array)
                    negative_validation_r.append(r_validation_selected)
                    count = count + 1
                    
            indexes = []
            each_list = negative_validation_r
            for each in each_list:
                i_ = json.loads(recipe2vec_dict[int(each)])
                indexes.append(i_)   
            negative_r_val = torch.LongTensor(indexes).to(device)
            
#             ingre = get_ingre_embedding_tensors(np.array(negative_validation_r)).to(device)
            negative_validation_u_array = np.array([negative_validation_u])
            negative_validation_r_array = np.array([negative_validation_r])
            
            negative_u_val = torch.LongTensor(negative_validation_u_array).view(-1,1).to(device)
#             negative_r_val = torch.LongTensor(negative_validation_r_array).view(-1,1).to(device)

            # obtain all the probabilities of 100 negative samples
            probabilities = model(negative_u_val,negative_r_val).cpu().numpy().squeeze()

            key_ = zip(negative_validation_u_array.squeeze(), negative_validation_r_array.squeeze())
            result_dict = dict(zip(key_, probabilities))
            result_dict[positive_u_r] = probability
            df = pd.DataFrame()
            df['key'] = result_dict.keys()
            df['value'] = result_dict.values()
            result = df.sort_values(by='value', ascending=False)
            # top 10
            new_df = result.head(10)
            find = new_df.loc[new_df['key']==positive_u_r]
            positive_position = find.index
            if len(find) != 0:
                if positive_u_r in result_dict.keys():
                    result_dict[positive_u_r] = result_dict[positive_u_r] + 1
                else:
                    result_dict[positive_u_r] = 1
                hit = hit + 1
#                 math.log(2) / math.log(positive_position+2)
        print(hit, index_validation+1)
        hit_ratio = hit / (index_validation+1)
        hit_ratio_list.append(hit_ratio)
        
        if hit_ratio > best_ratio:
            best_ratio = hit_ratio
            best_weights = copy.deepcopy(model.state_dict())
            no_improvements = 0
        else:
            no_improvements += 1
        print('hit ratio: {:.3f}'.format(hit_ratio))
        
        if no_improvements >= patience:
            break

end_time = time.time()
total_time = end_time - start_time
print('Total time {:.2f} min'.format(total_time / 60))

Training is started....


NameError: name 'time' is not defined

GMF : 10% HR
<br>
NCF: 13%HR

In [None]:
t = time.localtime()
current_time = time.strftime("%H_%M_%S", t)

training_loss_df = pd.DataFrame(training_loss_list,columns=['training_loss'])
training_loss_df.to_csv('result1202/train_loss_{}_1202.csv'.format(current_time), index=False)

hit_ratio = pd.DataFrame(hit_ratio_list, columns=['hit_ratio'])
hit_ratio.to_csv('result1202/hr_{}_1202.csv'.format(current_time), index=False)


model_path = 'model1202/model_{}_1202.pth'.format(current_time)
torch.save(best_weights, model_path)