### Real World application 5 (Chapitre 4)
create database twitch 

use twitch 

In [14]:
from py2neo import Graph
from py2neo import Node
from py2neo import Relationship
import pandas as pd

# Connecter à la base de données Neo4j
graph = Graph("bolt://localhost:7687", auth=("neo4j", "password_tuts"), name="twitch")

# Charger l'ensemble de données Twitch
target_df = pd.read_csv('FR_target.csv')
edges_df = pd.read_csv('FR_edges.csv')

# Convertir les colonnes 'from' et 'to' en type int
edges_df['from'] = edges_df['from'].astype(int)
edges_df['to'] = edges_df['to'].astype(int)

# Fonction pour créer ou obtenir un nœud streamer
def get_streamer_node(streamer_id):
    query = f"MATCH (s:Streamer) WHERE s.id = '{streamer_id}' RETURN s"
    result = graph.run(query).data()
    if result:
        return result[0]['s']
    else:
        streamer_node = Node("Streamer", id=streamer_id)
        graph.create(streamer_node)
        return streamer_node

# Insérer les nœuds streamer
for index, row in target_df.iterrows():
    streamer_id = row['id']
    get_streamer_node(streamer_id)

# Insérer les arêtes entre les streamers
for index, row in edges_df.iterrows():
    # Convertir les valeurs numpy.int64 en int natifs
    source_id = int(row['from'])
    target_id = int(row['to'])
    source_node = get_streamer_node(source_id)
    target_node = get_streamer_node(target_id)
    graph.create(Relationship(source_node, "COLLABORATED_WITH", target_node))

# Requête: Donner les IDs des 10 meilleurs streamers ayant au moins une collaboration
query = """
MATCH (s1:Streamer)-[:COLLABORATED_WITH]->(s2:Streamer)
WITH s1, count(DISTINCT s2) AS collaborations
ORDER BY collaborations DESC
LIMIT 10
RETURN s1.id AS streamer_id
"""
result = graph.run(query).to_table()
print("Top 10 streamers with at least one collaboration:")
print(result)

Top 10 streamers with at least one collaboration:
 streamer_id 
-------------
           0 
           0 
           0 
           0 
           0 
           0 
           0 
           0 
        6251 
           0 



In [5]:
print(target_df.head())

          id  days  mature  views  partner  new_id
0  150417538   577    True    775    False    4867
1  125723704   861    True   2585    False    3692
2  155971814   523   False   1566    False    3816
3   35832890  2234   False   9713    False     416
4   46787750  1896    True  44529    False    4502


In [6]:
print(edges_df.head())

   from    to
0     0  6420
1     0  2941
2     0  3051
3     0  5511
4     0  1020


In [17]:
# 2. Density of node degrees
density_query = """
MATCH (s:Streamer)
OPTIONAL MATCH (s)-[:COLLABORATED_WITH]->(other)
WITH s, count(DISTINCT other) AS degree
RETURN degree, count(*) AS density
ORDER BY degree
"""
density_result = graph.run(density_query).to_table()
print("\nDensity of node degrees:")
print(density_result)


Density of node degrees:
 degree | density 
--------|---------
      0 |  119217 
      1 |  112666 



In [None]:
# 3. Number of streamers with the "mature" tag
mature_query = """
MATCH (s:Streamer)
WHERE s.tags CONTAINS 'mature'
RETURN count(s) AS mature_streamers
"""
mature_result = graph.run(mature_query).to_table()
print("\nNumber of streamers with the 'mature' tag:")
print(mature_result)

In [None]:
# Connect to the Neo4j database
graph = Graph("bolt://localhost:7687", auth=("neo4j", "password_tuts"), name="twitch")

# 4. Number of collaborations involving at least one streamer with the "mature" tag
collaborations_mature_query = """
MATCH (s1:Streamer)-[r:COLLABORATED_WITH]->(s2:Streamer)
WHERE s1.tags CONTAINS 'mature' OR s2.tags CONTAINS 'mature'
RETURN count(r) AS collaborations_with_mature_tag
"""
collaborations_mature_result = graph.run(collaborations_mature_query).to_table()
print("\nNumber of collaborations involving at least one streamer with the 'mature' tag:")
print(collaborations_mature_result)

In [None]:
# Connect to the Neo4j database
graph = Graph("bolt://localhost:7687", auth=("neo4j", "password_tuts"), name="twitch")

# 5. Average number of collaborations for partners and non-partners
partners_collaborations_query = """
MATCH (s1:Streamer)-[:COLLABORATED_WITH]->(s2:Streamer)
WITH s1, s2, count(*) AS collaborations
WITH s1, s2, collaborations, (s1)-[:PARTNER]->(s2) AS is_partner
RETURN is_partner, avg(collaborations) AS avg_collaborations
"""
partners_collaborations_result = graph.run(partners_collaborations_query).to_table()
print("\nAverage number of collaborations for partners and non-partners:")
print(partners_collaborations_result) 