## Systeme de Recommandation basé sur Graph Neural Network

**Importer les données**



In [None]:
!git clone https://github.com/yassinetoumi18798/GNN_data

In [None]:
import pandas as pd
from os import path




def load1Ratings1():
    df = pd.read_table('/content/GNN_data/ytrain.txt',sep='\t',names=['userId','itemId','timestamp','rating']).iloc[:100000,:]
    item=[]

    for a in range(len(df)) :
      item.append(df.iloc[a,1])

    item=list(set(item))
    for i in range(len(df)):
      df.iloc[i,1]=item.index(df.iloc[i,1])+1

    return df


def loadRatings():
    file_name = '/content/GNN_data/yow_userstudy_raw.xls'
    list_columns= ['user_id','DOC_ID','user_like','TimeVisit'] 
    df = pd.read_excel(file_name, sheet_name=None)
    data_df = df['clientlog'][list_columns]
    data_df.columns= ['userId','itemId','rating','time']
    item=[]
    user=[]
    for a in range(len(data_df)) :
      item.append(data_df.iloc[a,1])
      user.append(data_df.iloc[a,0])

    item=list(set(item))
    user=list(set(user))
    for i in range(len(data_df)):
      data_df.iloc[i,1]=item.index(data_df.iloc[i,1])+1
      data_df.iloc[i,0]=user.index(data_df.iloc[i,0])+1
      data_df.iloc[i,2]=abs(data_df.iloc[i,2])

    return data_df




**le Modele GCN**

In [None]:
import torch
import torch.nn as nn
from torch.nn import Module
from scipy.sparse import coo_matrix
from scipy.sparse import vstack
from scipy import sparse
import numpy as np




class GNNLayer(Module):

    def __init__(self,inF,outF):

        super(GNNLayer,self).__init__()
        self.inF = inF
        self.outF = outF
        self.linear = torch.nn.Linear(in_features=inF,out_features=outF)
        self.interActTransform = torch.nn.Linear(in_features=inF,out_features=outF)

    def forward(self, laplacianMat,selfLoop,features):
       
        L1 = laplacianMat + selfLoop
        L2 = laplacianMat.cuda()
        L1 = L1.cuda()
        inter_feature = torch.sparse.mm(L2,features)
        inter_feature = torch.mul(inter_feature,features)

        inter_part1 = self.linear(torch.sparse.mm(L1,features))
        inter_part2 = self.interActTransform(torch.sparse.mm(L2,inter_feature))

        return inter_part1+inter_part2

class GCF(Module):

    def __init__(self,userNum,itemNum,rt,embedSize=100,layers=[100,80,50],useCuda=True):

        super(GCF,self).__init__()
        self.useCuda = useCuda
        self.userNum = userNum
        self.itemNum = itemNum
        self.uEmbd = nn.Embedding(userNum,embedSize)
        self.iEmbd = nn.Embedding(itemNum,embedSize)
        self.GNNlayers = torch.nn.ModuleList()
        self.LaplacianMat = self.buildLaplacianMat(rt) # sparse format
        self.leakyRelu = nn.LeakyReLU()
        self.selfLoop = self.getSparseEye(self.userNum+self.itemNum)

        self.transForm1 = nn.Linear(in_features=layers[-1]*(len(layers))*2,out_features=64)
        self.transForm2 = nn.Linear(in_features=64,out_features=32)
        self.transForm3 = nn.Linear(in_features=32,out_features=1)

        for From,To in zip(layers[:-1],layers[1:]):
            self.GNNlayers.append(GNNLayer(From,To))

    def getSparseEye(self,num):
        i = torch.LongTensor([[k for k in range(0,num)],[j for j in range(0,num)]])
        val = torch.FloatTensor([1]*num)
        return torch.sparse.FloatTensor(i,val)

    def buildLaplacianMat(self,rt):

        rt_item = rt['itemId'] + self.userNum
        uiMat = coo_matrix((rt['rating'], (rt['userId'], rt['itemId'])))

        uiMat_upperPart = coo_matrix((rt['rating'], (rt['userId'], rt_item)))
        uiMat = uiMat.transpose()
        uiMat.resize((self.itemNum, self.userNum + self.itemNum))

        A = sparse.vstack([uiMat_upperPart,uiMat])
        selfLoop = sparse.eye(self.userNum+self.itemNum)
        sumArr = (A>0).sum(axis=1)
        diag = list(np.array(sumArr.flatten())[0])
        diag = np.power(diag,-0.5)
        D = sparse.diags(diag)
        L = D * A * D
        L = sparse.coo_matrix(L)
        row = L.row
        col = L.col
        i = torch.LongTensor([row,col])
        data = torch.FloatTensor(L.data)
        SparseL = torch.sparse.FloatTensor(i,data)
        return SparseL

    def getFeatureMat(self):
        uidx = torch.LongTensor([i for i in range(self.userNum)])
        iidx = torch.LongTensor([i for i in range(self.itemNum)])
        if self.useCuda == True:
            uidx = uidx.cuda()
            iidx = iidx.cuda()

        userEmbd = self.uEmbd(uidx)
        itemEmbd = self.iEmbd(iidx)
        features = torch.cat([userEmbd,itemEmbd],dim=0)
        return features

    def forward(self,userIdx,itemIdx):

        itemIdx = itemIdx + self.userNum
        userIdx = list(userIdx.cpu().data)
        itemIdx = list(itemIdx.cpu().data)
        # gcf data propagation
        features = self.getFeatureMat()
        finalEmbd = features.clone()
        for gnn in self.GNNlayers:
            features = gnn(self.LaplacianMat,self.selfLoop,features)
            features = nn.ReLU()(features)
            finalEmbd = torch.cat([finalEmbd,features.clone()],dim=1)

        userEmbd = finalEmbd[userIdx]
        itemEmbd = finalEmbd[itemIdx]
        embd = torch.cat([userEmbd,itemEmbd],dim=1)

        embd = nn.ReLU()(self.transForm1(embd))
        embd = self.transForm2(embd)
        embd = self.transForm3(embd)
        prediction = embd.flatten()

        return prediction


In [None]:
from torch.utils.data import Dataset



class ML1K(Dataset):

    def __init__(self,rt):
        super(Dataset,self).__init__()
        self.uId = list(rt['userId'])
        self.iId = list(rt['itemId'])
        self.rt = list(rt['rating'])

    def __len__(self):
        return len(self.uId)

    def __getitem__(self, item):
        return (self.uId[item],self.iId[item],self.rt[item])

**entrainement du modele**

In [None]:
import torch
from torch import nn as nn

from scipy.sparse import coo_matrix
import pandas as pd
import numpy as np
from numpy import diag

from torch.utils.data import DataLoader

from torch.utils.data import random_split
from torch.optim import Adam
from torch.nn import MSELoss


rt = loadRatings()
userNum = rt['userId'].max()
itemNum = rt['itemId'].max()

rt['userId'] = rt['userId'] - 1
rt['itemId'] = rt['itemId'] - 1

para = {
    'epoch':1500,
    'lr':0.00015,
    'batch_size':2048,
    'train':0.8
}

ds = ML1K(rt)
trainLen = int(para['train']*len(ds))
train,test = random_split(ds,[trainLen,len(ds)-trainLen])
dl = DataLoader(train,batch_size=para['batch_size'],shuffle=True,pin_memory=True)

model = GCF(userNum, itemNum, rt, 80, layers=[80,80,80,]).cuda()


optim = Adam(model.parameters(), lr=para['lr'],weight_decay=0.02)
lossfn = MSELoss()

for i in range(para['epoch']):

    for id,batch in enumerate(dl):
        print('epoch:',i,' batch:',id)
        optim.zero_grad()
        prediction = model(batch[0].cuda(), batch[1].cuda())
        loss = lossfn(batch[2].float().cuda(),prediction)
        loss.backward()
        optim.step()
        print(loss)


testdl = DataLoader(test,batch_size=len(test),)
for data in testdl:
    prediction = model(data[0].cuda(),data[1].cuda())

loss = lossfn(data[2].float().cuda(),prediction)
print(loss) # MSEloss

**Evaluation**

In [None]:
from itertools import compress
def recommendation(idUsers):
  listItem=list(set(rt['itemId']))
  #print(len(listItem))
  dataset=[]
  for item in listItem:
    dataset.append((idUsers,item))
  testdl = DataLoader(dataset,batch_size=len(dataset),)
  for data in testdl:
    prediction = model(data[0].cuda(),data[1].cuda())
    return prediction.tolist()

tr=[]
for a in test:
  tr.append(rt.loc[(rt['userId']==a[0]) & (rt['itemId']==a[1])])

r=pd.concat(tr, axis=0)

def getlistItem(userId):
  items=r[r['userId']==userId]
  true = np.array(items['rating'])
  recommended = np.array((recommendation(userId)))[items['itemId']]
  user_rating=[]
  for i in range(len(true)):
    user_rating.append((recommended[i],true[i]))
  return user_rating

def recall(threshold,k):
  list_user = list(set(r['userId']))
  recalls=[]
  for a in list_user:
    user_ratings=getlistItem(a)
    user_ratings.sort(key=lambda x: x[0], reverse=True)

    n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

    n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

    n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold))
                              for (est, true_r) in user_ratings[:k])
    
    recalls.append(n_rel_and_rec_k / n_rel if n_rel != 0 else 0)
  return recalls
  
recalls=recall(3.5,20)
print(sum(rec for rec in recalls) / len(recalls))

**Recommendation**

In [None]:
def recommendationForUser(idUsers):
  listItem=list(set(rt['itemId']))
  #print(len(listItem))
  dataset=[]
  for item in listItem:
    dataset.append((idUsers,item))
  testdl = DataLoader(dataset,batch_size=len(dataset),)
  for data in testdl:
    prediction = model(data[0].cuda(),data[1].cuda())
    return np.array(dataset)[(prediction>3.5).tolist()][:,1]

In [None]:
#recommendation pour le user 1
recommendationForUser(1)