# Análisis de la red social del foro de discusión de piratería digital

#### Importación de librerías a emplear
`pip install neo4j pandas`

In [1]:
from neo4j import GraphDatabase, basic_auth
from pandas import DataFrame
from IPython.display import IFrame

#### Creación del driver de la base de datos

In [2]:
driver = GraphDatabase.driver(uri='bolt://localhost:7687', auth=basic_auth('neo4j', 'jcVQRGWM7CJdRv8dQf'), encrypted=False)

#### Relación derivada de LIKES

In [3]:
with driver.session() as session:
    session.run('MATCH (m1:Member)-[:LIKES]->(:Post)-[:HAS_CREATOR]->(m2:Member) '
        'MERGE (m1)-[r:INTERACTS_WITH]->(m2) ON CREATE SET r.weight = 1 ON MATCH SET r.weight = r.weight + 1')

#### Relación derivada de REPLY_OF

In [4]:
with driver.session() as session:
    session.run('MATCH (m1:Member)<-[:HAS_CREATOR]-(:Post)-[:REPLY_OF]->(:Post)-[:HAS_CREATOR]->(m2:Member) '
        'MERGE (m1)-[r:INTERACTS_WITH]->(m2) ON CREATE SET r.weight = 1 ON MATCH SET r.weight = r.weight + 1')

#### Carácter vacío para no visualizar nombres en los nodos

In [5]:
with driver.session() as session:
    session.run('MATCH (m:Member) SET m.empty = "‎"')

### Algoritmos de centralidad

#### PageRank

In [6]:
with driver.session() as session:
    result = session.run('CALL gds.pageRank.write({nodeProjection:"Member",'
        'relationshipProjection:{relType:{type:"INTERACTS_WITH",orientation:"NATURAL",properties:{}}},'
        'relationshipWeightProperty:"weight",relationshipProperties:["weight"],dampingFactor:0.85,'
        'maxIterations:100,writeProperty:"pagerank"})')
    pagerank = DataFrame.from_records(result.data())

In [7]:
pagerank

Unnamed: 0,writeMillis,nodePropertiesWritten,ranIterations,didConverge,centralityDistribution,postProcessingMillis,createMillis,computeMillis,configuration
0,671,84970,100,False,"{'p99': 2.9368276596069336, 'min': 0.149999618...",412,584,1297,"{'maxIterations': 100, 'writeConcurrency': 4, ..."


##### Resultados

In [8]:
with driver.session() as session:
    result = session.run('MATCH (m:Member) RETURN m.member_id, m.pagerank ORDER BY m.pagerank DESC LIMIT 10')
    pagerank = DataFrame.from_records(result.data())

In [9]:
pagerank

Unnamed: 0,m.member_id,m.pagerank
0,10154,2967.233316
1,86944,2604.744064
2,276885,1834.218798
3,5,1669.732633
4,11468,865.467899
5,147341,864.941414
6,119603,810.96463
7,23985,512.704352
8,38157,447.449322
9,17465,404.828443


In [10]:
IFrame(src='./pagerank.html', width=550, height=550)

#### Betweenness Centrality

In [11]:
with driver.session() as session:
    result = session.run('CALL gds.betweenness.write({nodeProjection:"Member",'
        'relationshipProjection:{relType:{type:"INTERACTS_WITH",orientation:"NATURAL",properties:{}}},'
        'writeProperty:"betweenness"})')
    betweenness = DataFrame.from_records(result.data())

In [12]:
betweenness

Unnamed: 0,nodePropertiesWritten,writeMillis,centralityDistribution,postProcessingMillis,createMillis,computeMillis,configuration
0,84970,583,"{'p99': 72887.99999988079, 'min': 0.0, 'max': ...",384,141,255656,"{'writeConcurrency': 4, 'writeProperty': 'betw..."


##### Resultados

In [13]:
with driver.session() as session:
    result = session.run('MATCH (m:Member) RETURN m.member_id, m.betweenness ORDER BY m.betweenness DESC LIMIT 10')
    betweenness = DataFrame.from_records(result.data())

In [14]:
betweenness

Unnamed: 0,m.member_id,m.betweenness
0,10154,424469600.0
1,86944,291622900.0
2,276885,270743600.0
3,5,160270600.0
4,18305,123342200.0
5,147341,91579830.0
6,38157,74742730.0
7,11468,73947770.0
8,119603,72542860.0
9,23985,69141630.0


In [15]:
IFrame(src='./betweenness.html', width=550, height=550)

### Algoritmos de comunidades

#### Label Propagation

In [16]:
with driver.session() as session:
    result = session.run('CALL gds.labelPropagation.write({nodeProjection:"Member",'
        'relationshipProjection:{relType:{type:"INTERACTS_WITH",orientation:"NATURAL",properties:{}}},'
        'relationshipWeightProperty:"weight",relationshipProperties:["weight"],'
        'maxIterations:10,writeProperty:"label_propagation"})')
    label_propagation = DataFrame.from_records(result.data())

In [17]:
label_propagation

Unnamed: 0,writeMillis,nodePropertiesWritten,ranIterations,didConverge,communityCount,communityDistribution,postProcessingMillis,createMillis,computeMillis,configuration
0,290,84970,5,True,34113,"{'p99': 2, 'min': 1, 'max': 50328, 'mean': 2.4...",47,143,1047,"{'maxIterations': 10, 'writeConcurrency': 4, '..."


##### Resultados

In [18]:
with driver.session() as session:
    result = session.run('MATCH (m:Member) WITH m.label_propagation AS label_propagation, COUNT(m) as count '
                         'RETURN label_propagation, count ORDER BY count DESC LIMIT 10')
    label_propagation = DataFrame.from_records(result.data())

In [19]:
label_propagation

Unnamed: 0,label_propagation,count
0,1331,50328
1,16853,38
2,155628,21
3,280719,8
4,131470,6
5,7455,5
6,79967,4
7,41829,4
8,28195,4
9,39018,4


In [20]:
IFrame(src='./label_propagation.html', width=550, height=550)

#### Louvain Modularity

In [21]:
with driver.session() as session:
    result = session.run('CALL gds.louvain.write({nodeProjection:"Member",'
        'relationshipProjection:{relType:{type:"INTERACTS_WITH",orientation:"NATURAL",properties:{}}},'
        'relationshipWeightProperty:"weight",relationshipProperties:["weight"],'
        'maxIterations:10,writeProperty:"louvain"})')
    louvain = DataFrame.from_records(result.data())

In [22]:
louvain

Unnamed: 0,writeMillis,nodePropertiesWritten,modularity,modularities,ranLevels,communityCount,communityDistribution,postProcessingMillis,createMillis,computeMillis,configuration
0,442,84970,0.183013,[0.18301344141900502],1,34221,"{'p99': 2, 'min': 1, 'max': 13736, 'mean': 2.4...",20,98,2154,"{'maxIterations': 10, 'writeConcurrency': 4, '..."


##### Resultados

In [23]:
with driver.session() as session:
    result = session.run('MATCH (m:Member) WITH m.louvain AS louvain, COUNT(m) as count '
                         'RETURN louvain, count ORDER BY count DESC LIMIT 10')
    louvain = DataFrame.from_records(result.data())

In [24]:
louvain

Unnamed: 0,louvain,count
0,37234,13736
1,37767,8075
2,37237,7281
3,37754,4394
4,37318,4066
5,37231,2659
6,37809,1257
7,38973,1178
8,38617,1145
9,37922,799


In [25]:
IFrame(src='./louvain.html', width=550, height=550)