https://neo4j.com/docs/graph-data-science/current/algorithms/louvain/

In [1]:
import pandas as pd
from py2neo import Graph
graph = Graph("bolt://localhost:7687", user='neo4j', password='newPassword')
# graph = Graph()

In [2]:
graph.run("CALL db.schema.visualization()").data()

[{'nodes': [(_-11:Merchant {constraints: ['CONSTRAINT ON ( merchant:Merchant ) ASSERT (merchant.franchisename) IS UNIQUE'], indexes: [], name: 'Merchant'}),
   (_-10:Client {constraints: ['CONSTRAINT ON ( client:Client ) ASSERT (client.dedupestatic) IS UNIQUE'], indexes: [], name: 'Client'}),
   (_-12:Segment {constraints: ['CONSTRAINT ON ( segment:Segment ) ASSERT (segment.seg_l3_num) IS UNIQUE'], indexes: [], name: 'Segment'})],
  'relationships': [(Client)-[:TRANSACTED_AT {}]->(Merchant),
   (Merchant)-[:MERCHANT_VALUE_LINK {}]->(Merchant),
   (Merchant)-[:MERCHANT_LINK {}]->(Merchant),
   (Merchant)-[:MERCHANT_FEET_LINK {}]->(Merchant)]}]

In [3]:
louvain_query="""
CALL gds.louvain.stream({
    nodeProjection: 'Merchant',
    relationshipProjection: {
        TYPE: {
            type: 'MERCHANT_FEET_LINK',
            orientation: 'undirected',
            aggregation: 'NONE'
        }
    },
    includeIntermediateCommunities: true
}) YIELD nodeId, communityId, intermediateCommunityIds
RETURN gds.util.asNode(nodeId).franchisename AS franchisename, 
gds.util.asNode(nodeId).companyname AS companyname,
communityId, intermediateCommunityIds
ORDER BY franchisename ASC
"""
df=graph.run(louvain_query).to_data_frame()

In [4]:
df=df[df.companyname.isin(['DISCHEM', 'CLICKS'])].copy()

In [10]:
len(set(df[df.companyname=="DISCHEM"].communityId))

211

In [11]:
len(set(df[df.companyname=="CLICKS"].communityId))

180

In [12]:
df[df.communityId==12410]

Unnamed: 0,franchisename,companyname,communityId,intermediateCommunityIds
20072,CLICKS ACORNHOEK MALL,CLICKS,12410,"[12410, 12410, 12410]"
20074,CLICKS ADDERLEY STREET,CLICKS,12410,"[12410, 12410, 12410]"
20076,CLICKS ALBERTON,CLICKS,12410,"[12410, 12410, 12410]"
20077,CLICKS AMAJUBA MALL,CLICKS,12410,"[12410, 12410, 12410]"
20078,CLICKS ANCHOR BAY,CLICKS,12410,"[12410, 12410, 12410]"
...,...,...,...,...
27317,DISCHEM WOODLANDS,DISCHEM,12410,"[12410, 12410, 12410]"
27318,DISCHEM WOODLANDS BOULEVA,DISCHEM,12410,"[12410, 12410, 12410]"
27319,DISCHEM WOODMEAD,DISCHEM,12410,"[12410, 12410, 12410]"
27320,DISCHEM WORCESTER,DISCHEM,12410,"[12410, 12410, 12410]"


In [None]:
list(df[df.companyname=="DISCHEM"].communityId)

In [7]:
import collections

In [8]:
print(collections.Counter(list(df.communityId)).most_common())

[(74830, 1336), (12410, 619), (32339, 601), (24494, 332), (84771, 116), (41793, 22), (73101, 2), (70976, 2), (30632, 2), (18675, 1), (71573, 1), (89815, 1), (70380, 1), (89670, 1), (68191, 1), (45143, 1), (68316, 1), (72363, 1), (42341, 1), (12879, 1), (70039, 1), (90325, 1), (89925, 1), (72332, 1), (68105, 1), (73999, 1), (48866, 1), (71088, 1), (89571, 1), (71381, 1), (67802, 1), (65077, 1), (70391, 1), (73743, 1), (72825, 1), (62988, 1), (73226, 1), (50031, 1), (90490, 1), (69659, 1), (43126, 1), (88046, 1), (65589, 1), (73030, 1), (57526, 1), (73068, 1), (71421, 1), (53693, 1), (67651, 1), (9429, 1), (51942, 1), (40841, 1), (37733, 1), (51856, 1), (68857, 1), (74170, 1), (71422, 1), (70280, 1), (68190, 1), (89563, 1), (73788, 1), (58967, 1), (72511, 1), (91181, 1), (41055, 1), (47497, 1), (5842, 1), (38511, 1), (49608, 1), (2517, 1), (16825, 1), (4530, 1), (30895, 1), (17662, 1), (32707, 1), (27865, 1), (5182, 1), (11076, 1), (12233, 1), (10765, 1), (7424, 1), (6011, 1), (41424, 1)

In [None]:
df[df.communityId==3129]

In [None]:
df[df.franchisename=="DIS-CHEM DAINFERN"]