In [None]:
from tqdm import tqdm 
import os 

import numpy as np 
import pandas as pd

import scipy.sparse  as sp 

from sklearn.model_selection import train_test_split 

import torch 
from torch import nn, optim 
from torch.utils.data import Dataset, DataLoader 

In [None]:
class args:
    seed = 42
    num_layers = 3
    batch_size= 512
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    SAVE_PATH = 'Parameters'

In [None]:
d_set = pd.read_csv('dataset/MovieLens1M/ratings.dat', sep='::', names=['user_id','business_id','stars','ts'], encoding='latin-1',header=None)
d_set = d_set.drop(columns=['ts'])

In [None]:
d_train, d_test = train_test_split(d_set, train_size=0.6, random_state=args.seed)
d_valid, d_test = train_test_split(d_test, train_size=0.5, random_state=args.seed)

In [None]:
d_train = d_train.astype({'user_id':'category', 'business_id':'category'})
d_valid = d_valid.astype({'user_id':'category', 'business_id':'category'})
d_test = d_test.astype({'user_id':'category', 'business_id':'category'})

In [None]:
u_cat = d_train.user_id.cat.categories
b_cat = d_train.business_id.cat.categories

In [None]:
d_valid.user_id = d_valid.user_id.cat.set_categories(u_cat)
d_valid.business_id = d_valid.business_id.cat.set_categories(b_cat)

d_test.user_id = d_test.user_id.cat.set_categories(u_cat)
d_test.business_id = d_test.business_id.cat.set_categories(b_cat)

In [None]:
d_train.user_id = d_train.user_id.cat.codes
d_train.business_id = d_train.business_id.cat.codes 

d_valid.user_id = d_valid.user_id.cat.codes
d_valid.business_id = d_valid.business_id.cat.codes 

d_test.user_id = d_test.user_id.cat.codes
d_test.business_id = d_test.business_id.cat.codes 

In [None]:
d_train = d_train.dropna()
d_valid = d_valid.dropna()
d_test = d_test.dropna()

d_train.reset_index(drop=True, inplace=True)
d_valid.reset_index(drop=True, inplace=True)
d_test.reset_index(drop=True, inplace=True)

In [None]:
d_train = d_train.astype({'user_id': int, 'business_id': int})
d_valid = d_valid.astype({'user_id': int, 'business_id': int})
d_test = d_test.astype({'user_id': int, 'business_id': int})

In [None]:
args.num_users = d_train.user_id.max() + 1
args.num_items = d_train.business_id.max() + 1
args.latent_dim = 64
args.num_epochs = 50

In [None]:
class GNNLayer(nn.Module):
    def __init__(self, in_feats, out_feats):
        super(GNNLayer, self).__init__()
        self.in_feats = in_feats
        self.out_feats = out_feats 

        self.W1 = nn.Linear(in_feats, out_feats)
        self.W2 = nn.Linear(in_feats, out_feats)

    def forward(self, L, SelfLoop, feats):
        # (L+I)EW_1
        sf_L = L + SelfLoop
        L = L.cuda()
        sf_L = sf_L.cuda()
        sf_E = torch.sparse.mm(sf_L, feats)
        left_part = self.W1(sf_E) # left part

        # EL odot EW_2, odot indicates element-wise product 
        LE = torch.sparse.mm(L, feats)
        E = torch.mul(LE, feats)
        right_part = self.W2(E)

        return left_part + right_part 

class NGCF(nn.Module):
    def __init__(self, args, matrix):
        super(NGCF, self).__init__()
        self.num_users = args.num_users 
        self.num_items = args.num_items 
        self.latent_dim = args.latent_dim 
        self.device = args.device

        self.user_emb = nn.Embedding(self.num_users, self.latent_dim)
        self.item_emb = nn.Embedding(self.num_items, self.latent_dim)

        self.num_layers = args.num_layers
        self.L = self.LaplacianMatrix(matrix)
        self.I = self.SelfLoop(self.num_users + self.num_items)

        self.leakyrelu = nn.LeakyReLU()
        self.GNNLayers = nn.ModuleList()

        for i in range(self.num_layers-1):
            self.GNNLayers.append(GNNLayer(self.latent_dim, self.latent_dim))

        self.fc_layer = nn.Sequential(
            nn.Linear(self.latent_dim * self.num_layers * 2, 64), 
            nn.ReLU(), 
            nn.Linear(64, 32), 
            nn.ReLU(), 
            nn.Linear(32, 1)
        )

    def SelfLoop(self, num):
        i = torch.LongTensor([[k for k in range(0, num)], [j for j in range(0, num)]])
        val = torch.FloatTensor([1]*num)
        return torch.sparse.FloatTensor(i, val)

    def LaplacianMatrix(self, ratings):
        iids = ratings['business_id'] + self.num_users 
        matrix = sp.coo_matrix((ratings['stars'], (ratings['user_id'], ratings['business_id'])))
        
        upper_matrix = sp.coo_matrix((ratings['stars'], (ratings['user_id'], iids)))
        lower_matrix = matrix.transpose()
        lower_matrix.resize((self.num_items, self.num_users + self.num_items))

        A = sp.vstack([upper_matrix, lower_matrix])
        row_sum = (A > 0).sum(axis=1)
        # row_sum = np.array(row_sum).flatten()
        diag = list(np.array(row_sum.flatten())[0])
        D = np.power(diag, -0.5)
        D = sp.diags(D)
        L = D * A * D
        L = sp.coo_matrix(L)
        row = L.row 
        col = L.col
        idx = np.stack([row, col])
        idx = torch.LongTensor(idx)
        data = torch.FloatTensor(L.data)
        SparseL = torch.sparse.FloatTensor(idx, data)
        return SparseL 

    def FeatureMatrix(self):
        uids = torch.LongTensor([i for i in range(self.num_users)]).to(self.device)
        iids = torch.LongTensor([i for i in range(self.num_items)]).to(self.device)
        user_emb = self.user_emb(uids)
        item_emb = self.item_emb(iids)
        features = torch.cat([user_emb, item_emb], dim=0)
        return features

    def forward(self, uids, iids):
        iids = self.num_users + iids 

        features = self.FeatureMatrix()
        final_emb = features.clone()

        for gnn in self.GNNLayers:
            features = gnn(self.L, self.I, features)
            features = self.leakyrelu(features)
            final_emb = torch.concat([final_emb, features],dim=-1)

        user_emb = final_emb[uids]
        item_emb = final_emb[iids]

        inputs = torch.concat([user_emb, item_emb], dim=-1)
        outs = self.fc_layer(inputs)
        return outs.flatten()

In [None]:
class GraphDataset(Dataset):
    def __init__(self, dataframe):
        super(Dataset, self).__init__()
        
        self.uid = list(dataframe['user_id'])
        self.iid = list(dataframe['business_id'])
        self.ratings = list(dataframe['stars'])
    
    def __len__(self):
        return len(self.uid)
    
    def __getitem__(self, idx):
        uid = self.uid[idx]
        iid = self.iid[idx]
        rating = self.ratings[idx]
        
        return (uid, iid, rating)

In [None]:
def get_loader(args, dataset, num_workers):
    d_set = GraphDataset(dataset)
    return DataLoader(d_set, batch_size=args.batch_size, num_workers=num_workers)

In [None]:
train_loader = get_loader(args, d_train, 4)
valid_loader = get_loader(args, d_valid, 4)
test_loader = get_loader(args, d_test, 4)



In [None]:
def graph_evaluate(args, model, test_loader, criterion):
    output = []
    test_loss = 0

    model.eval()
    with torch.no_grad():
        for batch in tqdm(test_loader, desc='evaluating...'):
            batch = tuple(b.to(args.device) for b in batch)
            inputs = {'uids':   batch[0], 
                      'iids':   batch[1]}
            gold_y = batch[2].float()
            
            pred_y = model(**inputs)
            output.append(pred_y)
            
            loss = criterion(pred_y, gold_y)
            loss = torch.sqrt(loss)
            test_loss += loss.item()
    test_loss /= len(test_loader)
    return test_loss, output

In [None]:
def graph_train(args, model, train_loader, valid_loader, optimizer, criterion):
    best_loss = float('inf')
    train_losses, valid_losses = [], []
    for epoch in range(1, args.num_epochs + 1):
        train_loss = 0.0

        model.train()
        for batch in tqdm(train_loader, desc='training...'):
            batch = tuple(b.to(args.device) for b in batch)
            inputs = {'uids':   batch[0], 
                      'iids':   batch[1]}
            
            gold_y = batch[2].float()
            

            pred_y = model(**inputs)
            
            loss = criterion(pred_y, gold_y)
            loss = torch.sqrt(loss)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)
        train_losses.append(train_loss)

        valid_loss , outputs = graph_evaluate(args, model, valid_loader, criterion)
        valid_losses.append(valid_loss)
        

        print(f'Epoch: [{epoch}/{args.num_epochs}]')
        print(f'Train Loss: {train_loss:.4f}\tValid Loss: {valid_loss:.4f}')

        if best_loss > valid_loss:
            best_loss = valid_loss
            if not os.path.exists(args.SAVE_PATH):
                os.makedirs(args.SAVE_PATH)
            torch.save(model.state_dict(), os.path.join(args.SAVE_PATH, f'{model._get_name()}_parameters.pt'))

    return {
        'train_loss': train_losses, 
        'valid_loss': valid_losses
    }, outputs

In [None]:
models = NGCF(args, d_train).to(args.device)

optimizer = optim.Adam(models.parameters(), lr = 1e-3)
criterion = nn.L1Loss()

In [None]:
results = graph_train(args, models, train_loader, valid_loader, optimizer, criterion)

training...: 100%|██████████| 59/59 [00:04<00:00, 12.35it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.58it/s]


Epoch: [1/50]
Train Loss: 1.1447	Valid Loss: 1.0922


training...: 100%|██████████| 59/59 [00:01<00:00, 31.77it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 27.65it/s]


Epoch: [2/50]
Train Loss: 0.8451	Valid Loss: 1.0653


training...: 100%|██████████| 59/59 [00:02<00:00, 25.52it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 35.26it/s]


Epoch: [3/50]
Train Loss: 0.8139	Valid Loss: 1.0725


training...: 100%|██████████| 59/59 [00:01<00:00, 31.62it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 33.49it/s]


Epoch: [4/50]
Train Loss: 0.8091	Valid Loss: 1.0692


training...: 100%|██████████| 59/59 [00:01<00:00, 32.08it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.50it/s]


Epoch: [5/50]
Train Loss: 0.7950	Valid Loss: 1.1000


training...: 100%|██████████| 59/59 [00:01<00:00, 31.39it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.35it/s]


Epoch: [6/50]
Train Loss: 0.7801	Valid Loss: 1.0620


training...: 100%|██████████| 59/59 [00:02<00:00, 29.40it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 24.46it/s]


Epoch: [7/50]
Train Loss: 0.7613	Valid Loss: 1.0611


training...: 100%|██████████| 59/59 [00:02<00:00, 27.27it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 33.67it/s]


Epoch: [8/50]
Train Loss: 0.7491	Valid Loss: 1.0926


training...: 100%|██████████| 59/59 [00:01<00:00, 32.00it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 35.04it/s]


Epoch: [9/50]
Train Loss: 0.7469	Valid Loss: 1.1331


training...: 100%|██████████| 59/59 [00:01<00:00, 31.61it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.53it/s]


Epoch: [10/50]
Train Loss: 0.7291	Valid Loss: 1.1388


training...: 100%|██████████| 59/59 [00:01<00:00, 30.98it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 35.23it/s]


Epoch: [11/50]
Train Loss: 0.7351	Valid Loss: 1.1446


training...: 100%|██████████| 59/59 [00:02<00:00, 26.83it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 23.59it/s]


Epoch: [12/50]
Train Loss: 0.7311	Valid Loss: 1.1252


training...: 100%|██████████| 59/59 [00:01<00:00, 29.99it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 33.99it/s]


Epoch: [13/50]
Train Loss: 0.7313	Valid Loss: 1.1013


training...: 100%|██████████| 59/59 [00:01<00:00, 31.80it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.18it/s]


Epoch: [14/50]
Train Loss: 0.7187	Valid Loss: 1.1461


training...: 100%|██████████| 59/59 [00:01<00:00, 31.62it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 35.19it/s]


Epoch: [15/50]
Train Loss: 0.6831	Valid Loss: 1.1349


training...: 100%|██████████| 59/59 [00:01<00:00, 31.74it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.18it/s]


Epoch: [16/50]
Train Loss: 0.7189	Valid Loss: 1.1184


training...: 100%|██████████| 59/59 [00:02<00:00, 25.40it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 23.87it/s]


Epoch: [17/50]
Train Loss: 0.7056	Valid Loss: 1.1753


training...: 100%|██████████| 59/59 [00:01<00:00, 31.64it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 35.11it/s]


Epoch: [18/50]
Train Loss: 0.6288	Valid Loss: 1.1889


training...: 100%|██████████| 59/59 [00:01<00:00, 31.87it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.18it/s]


Epoch: [19/50]
Train Loss: 0.5974	Valid Loss: 1.1835


training...: 100%|██████████| 59/59 [00:01<00:00, 31.74it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 32.74it/s]


Epoch: [20/50]
Train Loss: 0.5964	Valid Loss: 1.1662


training...: 100%|██████████| 59/59 [00:01<00:00, 31.71it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.20it/s]


Epoch: [21/50]
Train Loss: 0.6605	Valid Loss: 1.2439


training...: 100%|██████████| 59/59 [00:02<00:00, 23.97it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 28.85it/s]


Epoch: [22/50]
Train Loss: 0.6340	Valid Loss: 1.2203


training...: 100%|██████████| 59/59 [00:01<00:00, 31.85it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 33.71it/s]


Epoch: [23/50]
Train Loss: 0.6079	Valid Loss: 1.2273


training...: 100%|██████████| 59/59 [00:02<00:00, 25.75it/s]
evaluating...: 100%|██████████| 20/20 [00:01<00:00, 17.01it/s]


Epoch: [24/50]
Train Loss: 0.6023	Valid Loss: 1.2247


training...: 100%|██████████| 59/59 [00:01<00:00, 31.44it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 33.99it/s]


Epoch: [25/50]
Train Loss: 0.6409	Valid Loss: 1.1450


training...: 100%|██████████| 59/59 [00:02<00:00, 28.44it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 23.55it/s]


Epoch: [26/50]
Train Loss: 0.6423	Valid Loss: 1.1680


training...: 100%|██████████| 59/59 [00:02<00:00, 28.63it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.26it/s]


Epoch: [27/50]
Train Loss: 0.6363	Valid Loss: 1.1722


training...: 100%|██████████| 59/59 [00:01<00:00, 31.55it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 33.09it/s]


Epoch: [28/50]
Train Loss: 0.6567	Valid Loss: 1.1794


training...: 100%|██████████| 59/59 [00:01<00:00, 31.64it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.73it/s]


Epoch: [29/50]
Train Loss: 0.6566	Valid Loss: 1.2296


training...: 100%|██████████| 59/59 [00:01<00:00, 31.23it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 33.87it/s]


Epoch: [30/50]
Train Loss: 0.6378	Valid Loss: 1.2298


training...: 100%|██████████| 59/59 [00:02<00:00, 25.59it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 24.28it/s]


Epoch: [31/50]
Train Loss: 0.6078	Valid Loss: 1.2210


training...: 100%|██████████| 59/59 [00:01<00:00, 31.05it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.54it/s]


Epoch: [32/50]
Train Loss: 0.6007	Valid Loss: 1.1778


training...: 100%|██████████| 59/59 [00:01<00:00, 31.47it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 33.04it/s]


Epoch: [33/50]
Train Loss: 0.6309	Valid Loss: 1.1680


training...: 100%|██████████| 59/59 [00:01<00:00, 31.44it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 33.24it/s]


Epoch: [34/50]
Train Loss: 0.5573	Valid Loss: 1.1712


training...: 100%|██████████| 59/59 [00:01<00:00, 30.95it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.67it/s]


Epoch: [35/50]
Train Loss: 0.5411	Valid Loss: 1.1753


training...: 100%|██████████| 59/59 [00:02<00:00, 23.50it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 30.18it/s]


Epoch: [36/50]
Train Loss: 0.5830	Valid Loss: 1.2604


training...: 100%|██████████| 59/59 [00:01<00:00, 31.46it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.32it/s]


Epoch: [37/50]
Train Loss: 0.6067	Valid Loss: 1.2566


training...: 100%|██████████| 59/59 [00:01<00:00, 31.31it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.56it/s]


Epoch: [38/50]
Train Loss: 0.5735	Valid Loss: 1.2543


training...: 100%|██████████| 59/59 [00:01<00:00, 31.41it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.77it/s]


Epoch: [39/50]
Train Loss: 0.5164	Valid Loss: 1.2527


training...: 100%|██████████| 59/59 [00:01<00:00, 31.72it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 28.44it/s]


Epoch: [40/50]
Train Loss: 0.4946	Valid Loss: 1.2665


training...: 100%|██████████| 59/59 [00:02<00:00, 24.99it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.82it/s]


Epoch: [41/50]
Train Loss: 0.5229	Valid Loss: 1.2286


training...: 100%|██████████| 59/59 [00:01<00:00, 31.61it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.68it/s]


Epoch: [42/50]
Train Loss: 0.5883	Valid Loss: 1.1932


training...: 100%|██████████| 59/59 [00:01<00:00, 31.28it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.23it/s]


Epoch: [43/50]
Train Loss: 0.5850	Valid Loss: 1.2231


training...: 100%|██████████| 59/59 [00:01<00:00, 31.62it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.93it/s]


Epoch: [44/50]
Train Loss: 0.5645	Valid Loss: 1.2465


training...: 100%|██████████| 59/59 [00:01<00:00, 30.14it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 23.41it/s]


Epoch: [45/50]
Train Loss: 0.5059	Valid Loss: 1.2426


training...: 100%|██████████| 59/59 [00:02<00:00, 27.32it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 34.71it/s]


Epoch: [46/50]
Train Loss: 0.4732	Valid Loss: 1.2298


training...: 100%|██████████| 59/59 [00:01<00:00, 31.73it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 33.86it/s]


Epoch: [47/50]
Train Loss: 0.4798	Valid Loss: 1.1924


training...: 100%|██████████| 59/59 [00:01<00:00, 31.26it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 33.89it/s]


Epoch: [48/50]
Train Loss: 0.5183	Valid Loss: 1.1831


training...: 100%|██████████| 59/59 [00:01<00:00, 31.73it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 33.88it/s]


Epoch: [49/50]
Train Loss: 0.5492	Valid Loss: 1.2235


training...: 100%|██████████| 59/59 [00:02<00:00, 27.01it/s]
evaluating...: 100%|██████████| 20/20 [00:00<00:00, 23.67it/s]

Epoch: [50/50]
Train Loss: 0.4633	Valid Loss: 1.2201





In [None]:
torch.cuda.get_arch_list()

['sm_37', 'sm_50', 'sm_60', 'sm_70', 'sm_75', 'sm_80', 'sm_86', 'sm_90']