https://neo4j.com/docs/graph-data-science/current/algorithms/louvain/

https://towardsdatascience.com/louvain-algorithm-93fde589f58c

In [1]:
import pandas as pd
from py2neo import Graph
graph = Graph("bolt://localhost:7687", user='neo4j', password='newPassword')
# graph = Graph()

In [2]:
graph.run("CALL db.schema.visualization()").data()

[{'nodes': [(_-5:Merchant {constraints: ['CONSTRAINT ON ( merchant:Merchant ) ASSERT (merchant.franchisename) IS UNIQUE'], indexes: [], name: 'Merchant'}),
   (_-4:Client {constraints: ['CONSTRAINT ON ( client:Client ) ASSERT (client.dedupestatic) IS UNIQUE'], indexes: [], name: 'Client'}),
   (_-6:Segment {constraints: ['CONSTRAINT ON ( segment:Segment ) ASSERT (segment.seg_l3_num) IS UNIQUE'], indexes: [], name: 'Segment'})],
  'relationships': [(Client)-[:TRANSACTED_AT {}]->(Merchant),
   (Merchant)-[:MERCHANT_VALUE_LINK {}]->(Merchant),
   (Merchant)-[:MERCHANT_LINK {}]->(Merchant),
   (Merchant)-[:MERCHANT_FEET_LINK {}]->(Merchant)]}]

Louvain is an unsupervised algorithm (does not require the input of the number of communities nor their sizes before execution) divided in 2 phases: Modularity Optimization and Community Aggregation.

In [3]:
louvain_query="""
CALL gds.louvain.stream({
    nodeProjection: 'Merchant',
    relationshipProjection: {
        TYPE: {
            type: 'MERCHANT_FEET_LINK',
            orientation: 'undirected',
            aggregation: 'NONE'
        }
    },
    includeIntermediateCommunities: true
}) YIELD nodeId, communityId, intermediateCommunityIds
RETURN gds.util.asNode(nodeId).franchisename AS franchisename, 
gds.util.asNode(nodeId).companyname AS companyname,
communityId, intermediateCommunityIds
ORDER BY franchisename ASC
"""
df=graph.run(louvain_query).to_data_frame()

In [4]:
df

Unnamed: 0,franchisename,companyname,communityId,intermediateCommunityIds
0,"""DEBONAIRS BELLVILLE""",DEBONAIRS,50587,"[50587, 50587, 50587]"
1,*ACKERMANS,ACKERMANS,89041,"[89041, 89041, 89041]"
2,*ACKERMANS SWAKOP WATERF,ACKERMANS,57479,"[57479, 57479, 57479]"
3,*AHK MOTOR SPARES CC,SPAR,36071,"[36071, 36071, 36071]"
4,*AHK MOTOR SPARES CC JO,SPAR,34087,"[34087, 34087, 34087]"
...,...,...,...,...
91937,www.game.com.tw,GAME,27347,"[27347, 27347, 27347]"
91938,www.sparkhaus-shop.com,SPAR,7429,"[7429, 7429, 7429]"
91939,xmglobal limit,XMGLOBAL,72124,"[72124, 72124, 72124]"
91940,xmglobal limited,XMGLOBAL,17916,"[17916, 17916, 17916]"


In [5]:
df=df[df.companyname.isin(['DISCHEM', 'CLICKS'])].copy()

In [7]:
df

Unnamed: 0,franchisename,companyname,communityId,intermediateCommunityIds
20051,CLICKS,CLICKS,42607,"[42607, 42607, 42607]"
20052,CLICKS A/,CLICKS,86853,"[86853, 86853, 86853]"
20053,CLICKS DU,CLICKS,89815,"[89815, 89815, 89815]"
20054,CLICKS DUNES,CLICKS,28502,"[28502, 28502, 28502]"
20055,CLICKS KE,CLICKS,85660,"[85660, 85660, 85660]"
...,...,...,...,...
91847,payD Zapper*Dis-Chem L,DISCHEM,45453,"[45453, 45453, 45453]"
91848,payD Zapper*Dis-Chem M,DISCHEM,50169,"[50169, 50169, 50169]"
91849,payD Zapper*Dis-Chem O,DISCHEM,45793,"[45793, 45793, 45793]"
91850,payD Zapper*Dis-Chem P,DISCHEM,43897,"[43897, 43897, 43897]"


In [6]:
len(set(df[df.companyname=="DISCHEM"].communityId))

211

In [8]:
len(set(df[df.companyname=="CLICKS"].communityId))

180

In [13]:
df[df.communityId==40007]

Unnamed: 0,franchisename,companyname,communityId,intermediateCommunityIds
20072,CLICKS ACORNHOEK MALL,CLICKS,40007,"[40007, 40007, 40007]"
20074,CLICKS ADDERLEY STREET,CLICKS,40007,"[40007, 40007, 40007]"
20076,CLICKS ALBERTON,CLICKS,40007,"[40007, 40007, 40007]"
20077,CLICKS AMAJUBA MALL,CLICKS,40007,"[40007, 40007, 40007]"
20078,CLICKS ANCHOR BAY,CLICKS,40007,"[40007, 40007, 40007]"
...,...,...,...,...
27317,DISCHEM WOODLANDS,DISCHEM,40007,"[40007, 40007, 40007]"
27318,DISCHEM WOODLANDS BOULEVA,DISCHEM,40007,"[40007, 40007, 40007]"
27319,DISCHEM WOODMEAD,DISCHEM,40007,"[40007, 40007, 40007]"
27320,DISCHEM WORCESTER,DISCHEM,40007,"[40007, 40007, 40007]"


In [None]:
list(df[df.companyname=="DISCHEM"].communityId)

In [10]:
import collections

In [11]:
print(collections.Counter(list(df.communityId)).most_common())

[(16764, 1333), (40007, 619), (22975, 604), (16010, 332), (35760, 116), (28502, 22), (88381, 2), (89791, 2), (21878, 2), (42607, 1), (86853, 1), (89815, 1), (85660, 1), (89670, 1), (83471, 1), (45143, 1), (83596, 1), (87643, 1), (33587, 1), (36811, 1), (85319, 1), (90325, 1), (89925, 1), (87612, 1), (83385, 1), (89279, 1), (48866, 1), (86368, 1), (89571, 1), (86661, 1), (83082, 1), (65077, 1), (85671, 1), (89023, 1), (88105, 1), (62988, 1), (88506, 1), (50031, 1), (90490, 1), (84939, 1), (34372, 1), (81270, 1), (65589, 1), (88310, 1), (57526, 1), (88348, 1), (86701, 1), (53693, 1), (82931, 1), (9429, 1), (51942, 1), (32087, 1), (28979, 1), (51856, 1), (84137, 1), (89450, 1), (86702, 1), (85560, 1), (83470, 1), (89563, 1), (89068, 1), (58967, 1), (87791, 1), (91181, 1), (32301, 1), (47497, 1), (5842, 1), (29757, 1), (49608, 1), (2517, 1), (40757, 1), (4530, 1), (22141, 1), (41594, 1), (23953, 1), (19111, 1), (5182, 1), (11076, 1), (36165, 1), (10765, 1), (7424, 1), (6011, 1), (32670, 1)

In [16]:
df[df.communityId==88381]

Unnamed: 0,franchisename,companyname,communityId,intermediateCommunityIds
20066,CLICKS SW,CLICKS,88381,"[88381, 88381, 88381]"
27022,DIS-CHEM SWAKO,DISCHEM,88381,"[88381, 88381, 88381]"


In [14]:
df[df.franchisename=="DIS-CHEM DAINFERN"]

Unnamed: 0,franchisename,companyname,communityId,intermediateCommunityIds
26590,DIS-CHEM DAINFERN,DISCHEM,16764,"[13327, 13327, 16764]"


In [15]:
df[df.communityId.isin([16764, 13327])]

Unnamed: 0,franchisename,companyname,communityId,intermediateCommunityIds
20068,CLICKS TH,CLICKS,16764,"[60513, 13327, 16764]"
20134,CLICKS CARNIVA,CLICKS,16764,"[18274, 13327, 16764]"
20136,CLICKS CARNIVALMALLNCC,CLICKS,16764,"[18274, 13327, 16764]"
20137,CLICKS CARNIVALMALLNCCVAL,CLICKS,16764,"[18274, 13327, 16764]"
20161,CLICKS CROWT,CLICKS,16764,"[13327, 13327, 16764]"
...,...,...,...,...
91482,ZAPDisChemNicolw,DISCHEM,16764,"[49576, 13327, 16764]"
91488,ZAPDischemAtho,DISCHEM,16764,"[13327, 13327, 16764]"
91489,ZAPDischemAtholO,DISCHEM,16764,"[13327, 13327, 16764]"
91803,http://www.clicks.co.z,CLICKS,16764,"[13327, 13327, 16764]"
