In this collab we generate node embeddings with the help of Metapath2Vec and then calculate cuisine similarity between them. The goal is to find the most similar recipes according to the cosine similarity.

In [None]:
!pip install stellargraph



In [None]:
import pandas as pd
import numpy as np

In [None]:
users = pd.read_csv("/content/drive/MyDrive/food recipes/PP_users.csv")
recipes = pd.read_csv("/content/drive/MyDrive/food recipes/PP_recipes.csv")
interactions = pd.read_csv("/content/drive/MyDrive/food recipes/interactions_train.csv")

**Find unique users & recipes and give them appropriate indexes**

In [None]:
users = interactions['u'].unique()
users = pd.DataFrame(users, columns=['u'])
users = users.set_index("user_"+users.index.astype(str))
users

Unnamed: 0,u
user_0,22095
user_1,24732
user_2,1674
user_3,20667
user_4,19047
...,...
user_25071,18127
user_25072,5201
user_25073,13311
user_25074,7790


In [None]:
recipes = interactions['i'].unique()
recipes = pd.DataFrame(recipes, columns=["i"])
recipes = recipes.set_index('recipe_'+recipes.index.astype(str))
recipes 

Unnamed: 0,i
recipe_0,44367
recipe_1,87844
recipe_2,138181
recipe_3,93054
recipe_4,101723
...,...
recipe_160896,35126
recipe_160897,112580
recipe_160898,89109
recipe_160899,51525


In [None]:
interactions

Unnamed: 0,user_id,recipe_id,date,rating,u,i
0,2046,4684,2000-02-25,5.0,22095,44367
1,2046,517,2000-02-25,5.0,22095,87844
2,1773,7435,2000-03-13,5.0,24732,138181
3,1773,278,2000-03-13,4.0,24732,93054
4,2046,3431,2000-04-07,5.0,22095,101723
...,...,...,...,...,...,...
698896,926904,457971,2018-12-18,5.0,13681,141067
698897,2002312797,27208,2018-12-18,5.0,14897,99787
698898,1290903,131607,2018-12-18,5.0,11605,76163
698899,226867,363072,2018-12-18,5.0,3604,29101


In [None]:
edges = pd.read_csv("/content/drive/MyDrive/food recipes/edges_final.csv")
edges.drop(['Unnamed: 0'], axis=1, inplace=True)
edges

Unnamed: 0,target,source,date
0,user_0,recipe_0,2000-02-25
1,user_0,recipe_1,2000-02-25
2,user_1,recipe_2,2000-03-13
3,user_1,recipe_3,2000-03-13
4,user_0,recipe_4,2000-04-07
...,...,...,...
698896,user_21665,recipe_142940,2018-12-18
698897,user_25075,recipe_2669,2018-12-18
698898,user_22591,recipe_49370,2018-12-18
698899,user_10668,recipe_134757,2018-12-18


**Create the graph**

In [None]:
from stellargraph import StellarGraph

edges_graph = edges.drop(['date'], axis=1)
graph = StellarGraph({"users":users, "recipes":recipes}, edges_graph)
print(graph.info())

StellarGraph: Undirected multigraph
 Nodes: 185977, Edges: 698901

 Node types:
  recipes: [160901]
    Features: float32 vector, length 1
    Edge types: recipes-default->users
  users: [25076]
    Features: float32 vector, length 1
    Edge types: users-default->recipes

 Edge types:
    recipes-default->users: [698901]
        Weights: all 1 (default)
        Features: none


# **Metapath2Vec**

In [None]:
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import os
import networkx as nx
import numpy as np
import pandas as pd
from stellargraph import datasets
from IPython.display import display, HTML

%matplotlib inline

In [None]:
walk_length = 120  # maximum length of a random walk to use throughout this notebook

# specify the metapath schemas as a list of lists of node types.
metapaths = [
    ["users", "recipes", "users"],
    ["recipes", "users", "users", "recipes"],
    ["recipes", "recipes"],
]

In [None]:
from stellargraph.data import UniformRandomMetaPathWalk

# Create the random walker
rw = UniformRandomMetaPathWalk(graph)

walks = rw.run(
    nodes=list(graph.nodes()),  # root nodes
    length=walk_length,  # maximum length of a random walk
    n=1,  # number of random walks per root node
    metapaths=metapaths,  # the metapaths
)

print("Number of random walks: {}".format(len(walks)))

Number of random walks: 346878


In [None]:
from gensim.models import Word2Vec

model = Word2Vec(walks, size=128, window=5, min_count=0, sg=1, workers=2, iter=1)

In [None]:
model.wv.vectors.shape 

(185977, 128)

In [None]:
# Retrieve node embeddings and corresponding subjects
node_ids = model.wv.index2word  # list of node IDs
node_embeddings = (
    model.wv.vectors
)
node_embeddings

array([[ 4.9685150e-02,  2.5385190e-02, -1.4418734e-03, ...,
         8.9795506e-03,  4.4722357e-03, -1.1564555e-02],
       [ 2.0248305e-02, -1.3669088e-02,  2.2583427e-04, ...,
        -5.1572354e-04,  1.2055436e-02, -2.1957196e-02],
       [ 8.8100694e-03, -1.2057280e-02,  1.9685335e-03, ...,
         2.0633705e-02, -1.4310701e-02, -8.3530759e-03],
       ...,
       [-1.0182718e-03, -2.8971757e-03,  5.1995332e-04, ...,
         2.8876609e-03, -2.2294440e-03,  4.4694720e-05],
       [-3.0971675e-03,  3.0562051e-03, -1.2397080e-03, ...,
        -8.8931993e-04, -3.0645297e-03,  3.8372127e-03],
       [-3.7197389e-03,  1.2593233e-03, -3.6064521e-03, ...,
        -3.3696310e-03, -2.1357776e-03,  3.0402397e-04]], dtype=float32)

In [None]:
!pip install sentence-transformers



In [None]:
import pandas as pd
import numpy as np
import os 
import ast
import sentence_transformers  #### This is the package which we will use for encoding recipes using pretrained embedding
import matplotlib.pyplot as plt 
import networkx as nx #### Network x will be used to create graph based algorithms
import pickle ### We will use pickleto save files for later access
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity ### Cuisine Similary
from scipy import sparse ### Sparse Matrix
model = SentenceTransformer('bert-large-nli-stsb-mean-tokens') ### We will use this sentence encodings

Some weights of the model checkpoint at /root/.cache/torch/sentence_transformers/sbert.net_models_bert-large-nli-stsb-mean-tokens/0_BERT were not used when initializing BertModel: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
all_embeddings = np.load('/content/drive/MyDrive/food recipes/embeddings.npy')
all_embeddings

array([[ 4.9685150e-02,  2.5385190e-02, -1.4418734e-03, ...,
         8.9795506e-03,  4.4722357e-03, -1.1564555e-02],
       [ 2.0248305e-02, -1.3669088e-02,  2.2583427e-04, ...,
        -5.1572354e-04,  1.2055436e-02, -2.1957196e-02],
       [ 8.8100694e-03, -1.2057280e-02,  1.9685335e-03, ...,
         2.0633705e-02, -1.4310701e-02, -8.3530759e-03],
       ...,
       [-1.0182718e-03, -2.8971757e-03,  5.1995332e-04, ...,
         2.8876609e-03, -2.2294440e-03,  4.4694720e-05],
       [-3.0971675e-03,  3.0562051e-03, -1.2397080e-03, ...,
        -8.8931993e-04, -3.0645297e-03,  3.8372127e-03],
       [-3.7197389e-03,  1.2593233e-03, -3.6064521e-03, ...,
        -3.3696310e-03, -2.1357776e-03,  3.0402397e-04]], dtype=float32)

In [None]:
all_embeddings = np.array(node_embeddings)

In [None]:
np.save('/content/drive/MyDrive/food recipes/embeddings.npy', all_embeddings)

In [None]:
test = all_embeddings[:10000]

In [None]:
result = cosine_similarity(test)

In [None]:
result

array([[ 0.99999994,  0.10762647,  0.2445693 , ..., -0.06991005,
        -0.00136704, -0.09638256],
       [ 0.10762647,  0.9999999 ,  0.19804443, ..., -0.07662005,
         0.08817258, -0.0758438 ],
       [ 0.2445693 ,  0.19804443,  0.9999998 , ..., -0.02602956,
        -0.05381593, -0.10568848],
       ...,
       [-0.06991005, -0.07662005, -0.02602956, ...,  0.9999999 ,
        -0.08281579,  0.06086101],
       [-0.00136704,  0.08817258, -0.05381593, ..., -0.08281579,
         1.        ,  0.11282407],
       [-0.09638256, -0.0758438 , -0.10568848, ...,  0.06086101,
         0.11282407,  1.0000001 ]], dtype=float32)

In [None]:
df1 = pd.DataFrame(result)
pickle.dump(df1,open('result_similarity.pickle','wb'))

In [None]:
df1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,9960,9961,9962,9963,9964,9965,9966,9967,9968,9969,9970,9971,9972,9973,9974,9975,9976,9977,9978,9979,9980,9981,9982,9983,9984,9985,9986,9987,9988,9989,9990,9991,9992,9993,9994,9995,9996,9997,9998,9999
0,1.000000,0.107626,0.244569,0.387225,0.149793,0.004153,0.078170,0.110121,0.242933,-0.034670,0.066224,0.184790,0.285583,0.064707,0.042022,0.151528,0.091522,0.092624,0.114893,0.094982,0.046838,0.156096,0.055717,0.366096,0.165240,-0.038261,-0.011481,-0.008284,0.156469,-0.043601,0.061384,0.274359,0.136946,0.118224,0.191227,0.160152,0.065404,0.125629,0.142584,0.165514,...,0.047859,-0.008223,0.163904,-0.048483,0.007661,-0.255118,0.061991,0.118731,0.050195,-0.083351,-0.032996,-0.068559,-0.147720,-0.012748,-0.000978,-0.087309,-0.072267,0.026165,0.019132,0.115644,0.047058,0.143001,-0.001826,0.099834,-0.126207,-0.061878,-0.138241,-0.048044,0.126588,0.056496,0.039995,0.087009,0.042649,0.090258,0.006247,-0.025019,0.005435,-0.069910,-0.001367,-0.096383
1,0.107626,1.000000,0.198044,0.017539,0.225090,0.217055,0.079414,0.234134,0.339981,0.229991,0.309193,0.304357,0.147866,0.161819,0.303292,0.139407,0.265430,0.242058,0.167345,0.339932,0.262982,0.142932,0.173917,0.205252,0.282148,0.083598,0.185072,0.186532,0.078164,0.182123,0.252435,0.144546,0.141923,0.164072,0.142672,0.308173,0.075675,0.112283,0.013700,0.339316,...,0.120401,0.003588,-0.121294,0.008167,0.017059,-0.032192,0.211048,0.065922,0.148838,0.043689,-0.065396,0.030618,-0.139915,-0.079044,-0.183274,0.008996,-0.096031,-0.010287,0.012558,0.022880,-0.026490,0.054173,0.020705,-0.113542,-0.083100,-0.097127,-0.047249,-0.137661,-0.088066,0.051733,0.094542,-0.016519,-0.029769,-0.015361,-0.003352,0.058877,-0.168570,-0.076620,0.088173,-0.075844
2,0.244569,0.198044,1.000000,0.195981,0.292916,0.197194,0.267026,0.181301,0.157770,0.229667,0.299083,0.402666,0.327653,0.280243,0.107124,0.095974,0.300027,0.305818,0.205051,0.318471,0.274610,0.228124,0.119875,0.348800,0.256228,0.225569,0.091537,0.108430,0.215546,0.175351,0.159248,0.248964,0.192440,0.115139,0.360975,0.176838,0.102362,0.248981,0.127563,0.342575,...,-0.085665,-0.104948,0.168680,0.134510,0.134186,-0.040440,0.061312,-0.141189,-0.056395,-0.111222,-0.011089,-0.151747,-0.078980,0.026990,0.005884,0.019194,0.052162,-0.028849,0.110302,-0.043228,-0.057274,0.021818,-0.014373,0.169732,-0.026856,0.012380,-0.018292,-0.078680,0.045676,-0.096309,-0.033798,0.080208,-0.006308,0.012516,0.042922,0.060330,0.105475,-0.026030,-0.053816,-0.105688
3,0.387225,0.017539,0.195981,1.000000,0.344129,0.110473,0.223609,0.031685,0.197299,0.133452,0.280653,0.150820,0.281180,0.150332,-0.021844,0.125174,0.279670,0.145705,0.216137,0.216133,0.151761,0.090916,0.052240,0.186052,0.055142,0.019905,0.084219,0.167980,0.288636,0.163648,0.096865,0.209554,0.201555,0.191296,0.249575,0.098752,0.199444,0.052157,0.106708,0.138515,...,0.121084,0.076330,0.081265,-0.029240,-0.062166,-0.061332,0.102452,0.133553,0.056999,-0.073041,0.004849,-0.057813,0.045833,-0.080226,0.063621,-0.081274,-0.078666,0.035505,-0.095023,0.046334,0.052178,0.040452,0.065509,0.046826,-0.045281,-0.013410,0.015235,0.009501,0.095995,0.022624,-0.124981,-0.103987,0.070909,0.077347,0.125036,-0.070891,0.050161,0.032434,0.053072,-0.090863
4,0.149793,0.225090,0.292916,0.344129,1.000000,0.188859,0.293246,0.326844,0.234627,0.326799,0.384398,0.308458,0.160903,0.179535,0.254925,0.146785,0.280249,0.304117,0.177463,0.307476,0.296090,0.238740,0.134982,0.300973,0.207990,0.011753,0.257900,0.228585,0.273068,0.307471,0.230597,0.314258,0.257680,0.249426,0.399067,0.048650,0.180865,0.306936,0.151839,0.311303,...,0.013584,0.038238,0.005423,-0.015941,-0.068150,-0.054413,0.073003,0.045608,0.105884,0.019817,-0.069154,0.000077,-0.006773,-0.102099,0.007647,0.125935,-0.129933,-0.005814,0.045428,0.018750,0.080329,0.074622,0.037506,0.047661,0.014597,-0.055790,-0.079774,-0.062601,0.047532,0.012686,-0.168350,-0.160365,-0.062821,-0.093670,-0.069847,0.032383,0.206877,-0.008231,-0.069738,0.023420
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,-0.025019,0.058877,0.060330,-0.070891,0.032383,-0.053147,0.081353,-0.048211,-0.109189,-0.058652,-0.111713,-0.005228,0.057825,0.034275,-0.008138,-0.129712,-0.094150,0.006883,-0.097098,0.003165,0.027514,-0.041691,-0.021821,0.049731,-0.058687,0.056889,-0.147856,-0.133512,-0.087985,-0.047298,-0.040702,-0.059293,-0.086603,-0.015133,0.050305,-0.034991,0.130466,0.077923,-0.076549,0.005883,...,0.003609,0.015665,-0.062112,-0.078991,0.170677,-0.006019,-0.086795,-0.181738,-0.012844,-0.029127,0.125540,-0.004861,0.007529,-0.067702,-0.142907,0.020073,-0.132180,-0.067609,0.073683,-0.096254,-0.052050,-0.034114,0.084886,0.139730,-0.103289,-0.126092,-0.023220,0.066603,0.042473,-0.047787,-0.122767,0.061328,0.205369,-0.191415,-0.047418,1.000000,0.122392,0.117226,-0.052421,0.036444
9996,0.005435,-0.168570,0.105475,0.050161,0.206877,0.103190,0.074348,0.053721,-0.083468,0.004858,-0.069889,0.095595,0.027671,-0.038938,0.104242,-0.114996,0.131375,0.122001,-0.018666,-0.002247,0.087267,0.045403,0.074148,0.065390,-0.047630,0.185289,0.005257,0.033221,-0.001815,0.184925,0.004830,0.192293,0.037537,-0.014884,0.011455,-0.126324,0.156642,0.269763,0.058489,-0.029457,...,-0.180787,0.032111,0.045076,0.009794,-0.107771,0.003079,-0.053991,0.026766,-0.093010,-0.205097,0.156103,0.151144,-0.059000,-0.118944,0.095814,0.104764,-0.130005,-0.026517,0.168952,-0.090536,-0.129783,0.108501,-0.044415,0.133889,0.024903,-0.123479,-0.061853,0.054246,0.030016,-0.052042,0.017619,0.084631,0.095805,-0.067358,-0.032955,0.122392,1.000000,-0.039985,-0.087027,0.092553
9997,-0.069910,-0.076620,-0.026030,0.032434,-0.008231,-0.023710,-0.067721,0.028913,0.017590,0.103405,-0.107114,-0.071976,-0.062117,0.031500,-0.012724,0.023191,0.027347,0.097903,-0.115071,-0.069104,0.056150,-0.081502,-0.003313,-0.030953,0.043696,0.078690,-0.056781,0.056889,-0.037380,-0.012643,-0.015843,0.039489,-0.021634,-0.115492,0.074539,-0.067219,0.006339,0.113069,-0.014928,-0.098634,...,-0.130547,0.105896,0.057086,0.054835,0.143121,-0.071753,-0.125466,0.042106,-0.054827,-0.044556,-0.045589,-0.004327,-0.004478,-0.023929,0.086128,-0.141050,0.050326,0.120786,-0.042563,-0.239649,-0.061672,-0.068550,-0.051309,-0.002237,0.087673,-0.030338,0.052562,0.011485,-0.031894,0.085600,-0.122478,0.022904,0.133674,0.013236,0.110649,0.117226,-0.039985,1.000000,-0.082816,0.060861
9998,-0.001367,0.088173,-0.053816,0.053072,-0.069738,-0.067898,-0.046048,-0.034729,-0.166556,-0.120872,0.051055,-0.012539,0.064267,0.049296,0.136711,-0.098852,-0.006405,0.007824,-0.083555,-0.097436,-0.174164,-0.054899,0.070956,0.006817,0.016234,-0.171763,0.051939,-0.013872,-0.176900,-0.049890,0.097451,0.088224,-0.130637,-0.125371,0.006447,-0.106612,-0.054422,-0.188950,-0.010778,0.030632,...,-0.054440,-0.057559,0.053049,0.031851,-0.066475,-0.150397,0.197115,-0.113372,-0.018471,-0.064698,0.070082,0.032378,-0.015581,0.095995,-0.111206,0.065978,0.047415,-0.021278,-0.166679,0.042420,-0.112425,0.040285,0.115508,-0.007457,-0.116667,0.053166,-0.016049,0.088888,0.045220,-0.038269,0.019042,-0.039235,-0.012633,0.027560,0.206545,-0.052421,-0.087027,-0.082816,1.000000,0.112824


In [None]:
data_similarity = df1.unstack().reset_index()

In [None]:
data_similarity.columns = ['recipe1','recipe2','cosine_similarity']

In [None]:
data_similarity

Unnamed: 0,recipe1,recipe2,cosine_similarity
0,0,0,1.000000
1,0,1,0.107626
2,0,2,0.244569
3,0,3,0.387225
4,0,4,0.149793
...,...,...,...
99999995,9999,9995,0.036444
99999996,9999,9996,0.092553
99999997,9999,9997,0.060861
99999998,9999,9998,0.112824


In [None]:
data_similarity = data_similarity[data_similarity['cosine_similarity']<0.9999]
data_similarity = data_similarity[data_similarity['cosine_similarity']>0.3]
print (data_similarity.shape)

(31646, 3)


In [None]:
data_similarity

Unnamed: 0,recipe1,recipe2,cosine_similarity
3,0,3,0.387225
23,0,23,0.366096
63,0,63,0.366290
280,0,280,0.325494
2250,0,2250,0.347412
...,...,...,...
99988254,9998,8254,0.327445
99993621,9999,3621,0.306014
99993993,9999,3993,0.303178
99994765,9999,4765,0.328923
