### Cypher Queries

In [None]:
#Create uniqueness constraint on category ID - to avoid duplicates, also creates indexing which improves query execution

In [None]:
//yelp category constraint
CREATE CONSTRAINT ON (c:Category) ASSERT c.id is UNIQUE

In [21]:
# Add categories from json

In [None]:
//Create categories
CALL apoc.load.json("file:///categories.json")
YIELD value
MERGE (c:Category {id: value.category_id})
SET c.name = value.category_name

In [None]:
# Create uniqueness constraint on restaurant ID - to avoid duplicates, also creates indexing which improves query execution

In [None]:
//a 4 yelp restaurant constraint
CREATE CONSTRAINT ON (r:Restaurant) ASSERT r.id is UNIQUE

In [None]:
# Add restaurants from json

In [None]:
//a yelp create restaurant
call apoc.periodic.iterate('
CALL apoc.load.json("file:///restaurants.json")
YIELD value
','
MERGE (r:Restaurant {id: value.business_id})
SET r.stars = value.business_stars, r.review_count = value.business_review_count, r.open = value.is_open
',{batchSize:10000})

In [None]:
# load restaurant to category connections

In [None]:
//a 5 yelp Restaurant to category
call apoc.periodic.iterate('
CALL apoc.load.json("file:///restaurant_to_category.json")
YIELD value
','
OPTIONAL MATCH (r:Restaurant {id: value.business_id}),(c:Category {id: value.category_id})
MERGE (r)-[:IS_CATEGORY]-(c)
',{batchSize:10000})

In [None]:
# Create uniqueness constraint on user ID - to avoid duplicates, also creates indexing which improves query execution

In [None]:
//a 8 yelp user constraints
CREATE CONSTRAINT ON (u:User) ASSERT u.id is UNIQUE

In [None]:
# load users in batches

In [None]:
//a 8 yelp create users batch
call apoc.periodic.iterate('
CALL apoc.load.json("file:///users.json")
YIELD value
','
MERGE (u:User {id: value.user_id})
SET u.votes_useful = value.useful, u.votes_funny = value.funny, u.votes_cool = value.cool, u.user_review_count = value.review_count, u.fans = value.fans, u.average_stars = value.average_stars, u.average_compliment = value.mean_compliment_score
',{batchSize:10000})

In [None]:
# load users to restaurant connections in batches

In [None]:
//a 93 yelp user to restaurant
call apoc.periodic.iterate('
CALL apoc.load.json("file:///user_to_restaurant.json")
YIELD value
','
OPTIONAL MATCH (r:Restaurant {id: value.business_id}),(u:User {id: value.user_id})
MERGE (u)-[:REVIEWS {review_stars: value.review_stars}]-(r)
',{batchSize:10000})

In [None]:
#create an inmemory graph of the full graph to run Graph Data Science library algorithms in Noe4j 

In [None]:
//a 95 create inmemory full graph
CALL gds.graph.create('full_yelp', '*', '*')

In [None]:
#Train graphsage model on full graph --> this resulted in same embedding for all the nodes of same kind. 

In [None]:
# // a 96 graphsage yelp full
CALL gds.beta.graphSage.train(,  'full_yelp',,  {,    modelName: 'graphsage_yelp_full',    aggregator: 'mean',   activationFunction: 'relu',   sampleSizes: [25, 10],    degreeAsProperty: true,   projectedFeatureDimension: 5},)

In [None]:
# Write graphsage embeddings as node properties

In [None]:
//a 98 yelp graphsage embeddings
CALL gds.beta.graphSage.write(
'full_yelp',
{
	writeProperty: 'graphsage_embedding',
	modelName: 'graphsage_yelp_full'
});

In [None]:
 #create an inmemory graph of the sub graph to run Graph Data Science library algorithms in Noe4j 

In [None]:
// a 99 yelp rest cat full inmemory subgraph
CALL gds.graph.create(,  'restaurant_and_category_full',,  {,    Restaurant: {,      label: 'Restaurant',,      properties: ['review_count', 'stars', 'open'],    },,    Category: {,      label: 'Category',,      properties: ['id'],    },  }, {,    IS_CATEGORY: {,      type: 'IS_CATEGORY',,      orientation: 'UNDIRECTED',    },}),

In [None]:
#Train graphsage model on a sub graph --> this also resulted in same embedding for all the nodes of same kind.

In [None]:
//a 991 yelp restcat graphsage
CALL gds.beta.graphSage.train(
	'restaurant_and_category_full',
	{
		modelName: 'graphsage_rest_cat',
		featureProperties: ['review_count', 'stars', 'open'],
		aggregator: 'mean',
		activationFunction: 'relu',
		degreeAsProperty: true,
		epochs: 5,
		searchDepth:5,
		sampleSizes:[10,5],
		negativeSampleWeight: 10,
        projectedFeatureDimension:3
	}
)

In [None]:
# Write graphsage embeddings trained on subgraph as node properties

In [None]:
//a 98 yelp graphsage embeddings
CALL gds.beta.graphSage.write(
'restaurant_and_category_full',
{
	writeProperty: 'graphsage_embedding_subgraph',
	modelName: 'graphsage_rest_cat'
});

In [None]:
# Write FastRP (Fast Random Project) node embeddings as a node feature to all the nodes on subgraph - for full graph it was resulting in a lot of zero embedding vectors

In [None]:
//a 992 yelp fastRP full
CALL gds.fastRP.write(
  'restaurant_and_category_full',
  {
    iterationWeights: [1.0, 1.0, 4.0],
    embeddingDimension: 5,
    normalizationStrength: -0.9,
    writeProperty: 'fastRP'
  }
)