In [1]:
# Import-a-thon.

import pandas as pd
from py2neo import Graph
from igraph import Graph as IGraph

# Randomizer for user and/or product selection.
from random import randint

In [2]:
graph = Graph(bolt=True, host="localhost", http_port=7687, user='neo4j', password='pasta')

In [33]:
pd.DataFrame(graph.data("""
MATCH (a:User)<-[b:BOUGHT]-()-[:BOUGHT]->(d:User)
WHERE b.order_total > 50
RETURN a.id, d.id, COUNT(*) AS weight
ORDER BY weight DESC
LIMIT 10"""))

In [4]:
cluster = graph.run("""
MATCH (a:Aisle)<-[:FOUND_IN]-()-[:TYPE_OF]->(d:Department)
RETURN a.name AS aisleName, d.name AS departmentName, COUNT(*) AS weight""")

In [5]:
ig = IGraph.TupleList(cluster, weights=True)
ig

<igraph.Graph at 0x1166ff138>

In [25]:
clusters = IGraph.community_walktrap(ig, weights='weight')
clusters = clusters.as_clustering()
len(clusters)

16

In [26]:
# Let's take a look at the 'clusters'
nodes = [node['name'] for node in ig.vs]
nodes = [{'id': x, 'label': x} for x in nodes]
nodes[:5]

for node in nodes:
    idx = ig.vs.find(name=node['id']).index
    node['group'] = clusters.membership[idx]
    
nodes[:20]

[{'group': 0,
  'id': 'canned fruit applesauce',
  'label': 'canned fruit applesauce'},
 {'group': 0, 'id': 'canned goods', 'label': 'canned goods'},
 {'group': 1, 'id': 'trail mix snack mix', 'label': 'trail mix snack mix'},
 {'group': 1, 'id': 'snacks', 'label': 'snacks'},
 {'group': 1, 'id': 'chips pretzels', 'label': 'chips pretzels'},
 {'group': 2, 'id': 'fresh herbs', 'label': 'fresh herbs'},
 {'group': 2, 'id': 'produce', 'label': 'produce'},
 {'group': 3, 'id': 'prepared soups salads', 'label': 'prepared soups salads'},
 {'group': 3, 'id': 'deli', 'label': 'deli'},
 {'group': 4,
  'id': 'preserved dips spreads',
  'label': 'preserved dips spreads'},
 {'group': 4, 'id': 'pantry', 'label': 'pantry'},
 {'group': 5,
  'id': 'specialty wines champagnes',
  'label': 'specialty wines champagnes'},
 {'group': 5, 'id': 'alcohol', 'label': 'alcohol'},
 {'group': 6, 'id': 'eggs', 'label': 'eggs'},
 {'group': 6, 'id': 'dairy eggs', 'label': 'dairy eggs'},
 {'group': 7, 'id': 'frozen desser

In [27]:
for i in nodes:
    if i['group'] == 2:
        for k, v in i.items():
            print(v)

fresh herbs
fresh herbs
2
produce
produce
2
packaged produce
packaged produce
2
packaged vegetables fruits
packaged vegetables fruits
2
fresh vegetables
fresh vegetables
2
fresh fruits
fresh fruits
2


In [30]:
clusters_ = IGraph.community_walktrap(ig, weights='weight', steps=2)
clusters_ = clusters_.as_clustering()
len(clusters_)

16

In [31]:
nodes = [node['name'] for node in ig.vs]
nodes = [{'id': x, 'label': x} for x in nodes]
nodes[:5]

for node in nodes:
    idx = ig.vs.find(name=node['id']).index
    node['group'] = clusters.membership[idx]
    
nodes[:20]

[{'group': 0,
  'id': 'canned fruit applesauce',
  'label': 'canned fruit applesauce'},
 {'group': 0, 'id': 'canned goods', 'label': 'canned goods'},
 {'group': 1, 'id': 'trail mix snack mix', 'label': 'trail mix snack mix'},
 {'group': 1, 'id': 'snacks', 'label': 'snacks'},
 {'group': 1, 'id': 'chips pretzels', 'label': 'chips pretzels'},
 {'group': 2, 'id': 'fresh herbs', 'label': 'fresh herbs'},
 {'group': 2, 'id': 'produce', 'label': 'produce'},
 {'group': 3, 'id': 'prepared soups salads', 'label': 'prepared soups salads'},
 {'group': 3, 'id': 'deli', 'label': 'deli'},
 {'group': 4,
  'id': 'preserved dips spreads',
  'label': 'preserved dips spreads'},
 {'group': 4, 'id': 'pantry', 'label': 'pantry'},
 {'group': 5,
  'id': 'specialty wines champagnes',
  'label': 'specialty wines champagnes'},
 {'group': 5, 'id': 'alcohol', 'label': 'alcohol'},
 {'group': 6, 'id': 'eggs', 'label': 'eggs'},
 {'group': 6, 'id': 'dairy eggs', 'label': 'dairy eggs'},
 {'group': 7, 'id': 'frozen desser