# Header files

In [7]:
from numpy import *
import networkx as nx
import matplotlib.pyplot as plt
import math
import numpy as np
import pandas as pd
from numpy.linalg import *

In [8]:
pip install scipy==1.8.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Other Functions


In [34]:
def degree(G,u):
    return len([n for n in G.neighbors(u)])

# Link Prediction Model

In [35]:
def CN(G,u,v):
    G=G[0]
    return len([i for i in G.neighbors(u) if i in G.neighbors(v)])

def SaltonIndex(Graph,u,v):
    G=Graph[0]
    ku=degree(G,u)
    kv=degree(G,v)
    return CN(Graph,u,v)/((ku*kv)**0.5)

def jaccardIndex(Graph,u,v):
    G=Graph[0]
    return CN(Graph,u,v)/len(set([i for i in G.neighbors(u)]+[i for i in G.neighbors(u)]))

def Sorensen(Graph,u,v):
    G=Graph[0]
    ku=degree(G,u)
    kv=degree(G,v)
    return (2*CN(Graph,u,v))/(ku+kv)

def HPI(G,u,v):
    G=Graph[0]
    ku=degree(G,u)
    kv=degree(G,v)
    return (CN(Graph,u,v))/min(ku,kv)

def HDI(G,u,v):
    G=Graph[0]
    ku=degree(G,u)
    kv=degree(G,v)
    return (CN(Graph,u,v))/max(ku,kv)

def LHN1(G,u,v):
    G=Graph[0]
    ku=degree(G,u)
    kv=degree(G,v)
    return (2*CN(Graph,u,v))/(ku*kv)

def PA(G,u,v):
    G=Graph[0]
    ku=degree(G,u)
    kv=degree(G,v)
    return ku*kv

def AA(G,u,v):
    G=Graph[0]
    z=[i for i in G.neighbors(u) if i in G.neighbors(v)]
    return sum([1/math.log(degree(G,i)) for i in z])

def RA(G,u,v):
    G=Graph[0]
    z=[i for i in G.neighbors(u) if i in G.neighbors(v)]
    return sum([1/degree(G,i) for i in z])

def LP(A,u,v):
    l=A[2]
    A=A[1]
    u=l.index(u)
    v=l.index(v)
    epsilon=0.01
    A2=matrix_power(A,2)
    A3=epsilon*matrix_power(A,3)
    return A2[u][v]+A3[u][v]

def Katz(A,u,v):
    l=A[2]
    A=A[1]
    u=l.index(u)
    v=l.index(v)
    beta=0.01
    I=np.identity(len(l))
    rslt=matrix_power(I-beta*A,-1)-I
    return rslt[u,v]

def SCT(L,u,v):
  l=L[2]
  L=L[3]
  u=l.index(u)
  v=l.index(v)
  return 1/(L[u,u]+L[v,v]-2*L[u,v])

def cosplus(L,u,v):
  l=L[2]
  L=L[3]
  u=l.index(u)
  v=l.index(v)
  return L[u,v]/sqrt(L[u,u]*L[v,v])

# Data Import

In [11]:
from pandas.core.window.expanding import template_returns
data={}
datat=[]
with open(r"/content/drive/MyDrive/Colab Notebooks/user_taggedartists.dat") as datFile:
    datat.append([data.split()[0] for data in datFile])
with open(r"/content/drive/MyDrive/Colab Notebooks/user_taggedartists.dat") as datFile:
    datat.append([data.split()[1] for data in datFile])
with open(r"/content/drive/MyDrive/Colab Notebooks/user_taggedartists.dat") as datFile:
    datat.append([data.split()[2] for data in datFile])

for i in datat:
    data[i[0]]=i[1:]

data=pd.DataFrame(data)

print(data)
part = data.sample(frac = 0.95)
rest = data.drop(part.index)

#print(data)
#for i,j,k in zip(rest.userID,rest.artistID,rest.tagID):
#  data=data.append({'userID':i,'artistID':j,'tagID':0},ignore_index=True)
#print(data)

listUser=['u'+i for i in data['userID']]
listTag=[i for i in data['tagID']]
listItem=['i'+i for i in data['artistID']]

partUser=['u'+i for i in part['userID']]
partTag=[i for i in part['tagID']]
partItem=['i'+i for i in part['artistID']]

restUser=['u'+i for i in rest['userID']]
restTag=[i for i in rest['tagID']]
restItem=['i'+i for i in rest['artistID']]

G=nx.Graph()
G.add_nodes_from(listUser, bipartite=0)
G.add_nodes_from(listTag, bipartite=1)
G.add_nodes_from(listItem, bipartite=2)

for i,j,k in zip(partUser,partTag,partItem):
    G.add_edge(i,j)
    G.add_edge(k,j)
    G.add_edge(i,k)

for i,k in zip(restUser,restItem):
    G.add_edge(i,k)

nodes = G.nodes()
nodes_0  = set([n for n in nodes if  G.nodes[n]['bipartite']==0])
nodes_1  = set([n for n in nodes if  G.nodes[n]['bipartite']==1])
nodes_2  = set([n for n in nodes if  G.nodes[n]['bipartite']==2])

pos = dict()
pos.update( (n, (1, i)) for i, n in enumerate(nodes_0) ) 
pos.update( (n, (2, i)) for i, n in enumerate(nodes_1) )
pos.update( (n, (3, i)) for i, n in enumerate(nodes_2) ) 

#nx.draw(G, pos=pos,with_labels=True)
#plt.show()

GList=[i for i in G.nodes()]
sort(GList)
temp=nx.adjacency_matrix(G,GList)
temp=temp.todense()
A=temp

DiagMat=np.zeros((len(GList),len(GList)))
for i,j in enumerate(GList):
  DiagMat[i,i]=degree(G,j)

LapMat=DiagMat-A
Graph=(G,A,GList,LapMat)

       userID artistID tagID
0           2       52    13
1           2       52    15
2           2       52    18
3           2       52    21
4           2       52    41
...       ...      ...   ...
186474   2100    16437     4
186475   2100    16437   292
186476   2100    16437  2087
186477   2100    16437  2801
186478   2100    16437  3335

[186479 rows x 3 columns]


# Prediction

In [12]:
pred=rest.copy(deep=True)
def predict(G,u,i,score,tags,n):
  temp={}
  count=-1
  for t in tags:
    count=count+1
    if count>n:
      break
    temp[t]=score(G,u,t)+score(G,i,t)
  NTemp = sorted(temp.items(), key=lambda x:x[1])
  NTemp=dict(NTemp[::-1][:n])
  return list(NTemp.keys())[0]

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve, f1_score

metrics=[CN,SaltonIndex,jaccardIndex,Sorensen,HPI,HDI,LHN1,PA,AA,RA,LP,Katz,SCT,cosplus]
for f in metrics:
  for k,i,j in zip (pred.index,pred.userID,pred.artistID):
    pred.loc[k,'tagID']=predict(Graph,'u'+i,'i'+j,f,listTag,1)
  print('\n',str(f))
  print(f1_score(rest.tagID,pred.tagID,average='weighted'))
  print(accuracy_score(rest.tagID,pred.tagID))


 <function CN at 0x7f19a1d8fee0>
0.00014029197589010466
0.007078507078507079

 <function SaltonIndex at 0x7f19a1d8f1f0>
0.00016763091697605498
0.006649506649506649

 <function jaccardIndex at 0x7f19a1ed3160>
0.0001320322060687172
0.006971256971256971

 <function Sorensen at 0x7f19a1ed3d30>
0.0001487199386459353
0.006327756327756328

 <function HPI at 0x7f19a1ed3e50>
0.00014674411138198968
0.006971256971256971

 <function HDI at 0x7f19a1dd8670>
0.000131752706662246
0.005577005577005577

 <function LHN1 at 0x7f19a1d87700>
0.0001030068910253242
0.004826254826254826

 <function PA at 0x7f19a1d87a60>
9.950616979370974e-05
0.007078507078507079

 <function AA at 0x7f19a1d870d0>
0.00014029197589010466
0.007078507078507079

 <function RA at 0x7f19a1d87430>
0.00013338255215931925
0.007078507078507079
