### Connection

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from graphdatascience import GraphDataScience

URI = "neo4j://localhost:7687"
NAME_DB = "movies"

gds = GraphDataScience(URI, aura_ds=False, database=NAME_DB)

  from .autonotebook import tqdm as notebook_tqdm


# Graph Analytics

- similarità tra utente e tra film
- provare ad aggiungere proprietà di grado o degree
- trovare community
- confrontare community
- recommendation system in base a quale community ti trovi

In [147]:
result = gds.run_cypher(
    """
    CALL gds.graph.project(
    'movies-analyzed',
    {
        Movie:{properties:['minRating','maxRating','avgRating','stdRating','favoriteYear','favoriteMonth','year']},
        User:{properties: ['binaryGender','age','occupation','favoriteGenreId', 'ratings','minRating','maxRating','avgRating','stdRating','favoriteYear','favoriteMonth']},
        Genre:{properties: ['ratings']}
    },
    {
        RATED: {properties: 'rating', orientation: 'UNDIRECTED'},
        IN_GENRE:{},
        LIKES:{}
    }
    )
    YIELD
    graphName AS graph, nodeProjection, nodeCount AS nodes, relationshipProjection, relationshipCount AS rels
"""
)
result

Unnamed: 0,graph,nodeProjection,nodes,relationshipProjection,rels
0,movies-analyzed,"{'User': {'label': 'User', 'properties': {'fav...",9941,"{'RATED': {'aggregation': 'DEFAULT', 'orientat...",2024939


In [190]:
gds.run_cypher(
    """
    CALL gds.graph.list('movies-analyzed')
    YIELD density,degreeDistribution;
"""
)

Unnamed: 0,density,degreeDistribution
0,0.021759,"{'min': 0, 'max': 3443, 'p90': 521, 'p999': 25..."


## Similarity Analysis

In [285]:
result = gds.run_cypher(
    """
    CALL gds.nodeSimilarity.write('movies-analyzed',{nodeLabels:['User','Genre'],relationshipTypes:['LIKES'], similarityCutoff: 0.7,writeRelationshipType: 'SIMILAR',
    writeProperty: 'score'}) 
    """
)
print(f"Similarity relations created: {result.loc[0]['relationshipsWritten']}\n")
print(result.loc[0]["similarityDistribution"])

Similarity relations created: 58038

{'min': 1.0, 'p5': 1.0, 'max': 1.0, 'p99': 1.0, 'p1': 1.0, 'p10': 1.0, 'p90': 1.0, 'p50': 1.0, 'p25': 1.0, 'p75': 1.0, 'p95': 1.0, 'mean': 1.0, 'p100': 1.0, 'stdDev': 0.0}


In [284]:
result = gds.run_cypher(
    """
    CALL gds.nodeSimilarity.write('movies-analyzed',{nodeLabels:['Movie','Genre'],relationshipTypes:['IN_GENRE'], similarityCutoff: 0.7,writeRelationshipType: 'SIMILAR',
    writeProperty: 'score'}) 
    """
)
print(f"Similarity relations created: {result.loc[0]['relationshipsWritten']}\n")
print(result.loc[0]["similarityDistribution"])

Similarity relations created: 36208

{'min': 0.75, 'p5': 1.0000038146972656, 'max': 1.0000038146972656, 'p99': 1.0000038146972656, 'p1': 0.75, 'p10': 1.0000038146972656, 'p90': 1.0000038146972656, 'p50': 1.0000038146972656, 'p25': 1.0000038146972656, 'p75': 1.0000038146972656, 'p95': 1.0000038146972656, 'mean': 0.9906410962356386, 'p100': 1.0000038146972656, 'stdDev': 0.04720379496811561}


### KNN

In [286]:
result = gds.run_cypher(
    """
    CALL gds.knn.write('movies-analyzed' ,{nodeLabels:['User'],writeRelationshipType: 'SIMILAR_KNN',
    writeProperty: 'score',nodeProperties: ['binaryGender','age','occupation', 'ratings','minRating','maxRating','avgRating','stdRating','favoriteGenreId','favoriteYear','favoriteMonth'],topK: 3, concurrency: 1, randomSeed: 42,similarityCutoff: 0.7})
    """
)
print(f"Similarity relations created: {result.loc[0]['relationshipsWritten']}\n")
print(result.loc[0]["similarityDistribution"])

Similarity relations created: 18095

{'min': 0.6999969482421875, 'p5': 0.7797164916992188, 'max': 1.0000038146972656, 'p99': 0.93939208984375, 'p1': 0.7415046691894531, 'p10': 0.794403076171875, 'p90': 0.9104652404785156, 'p50': 0.8638916015625, 'p25': 0.8256378173828125, 'p75': 0.8766212463378906, 'p95': 0.9138755798339844, 'mean': 0.8542342275306973, 'p100': 1.0000038146972656, 'stdDev': 0.04399845235461064}


In [287]:
result = gds.run_cypher(
    """
    CALL gds.knn.write('movies-analyzed' ,{nodeLabels:['Movie'],writeRelationshipType: 'SIMILAR_KNN',
    writeProperty: 'score',nodeProperties: ['minRating','maxRating','avgRating','stdRating','favoriteYear','favoriteMonth','year'],topK: 3, concurrency: 1, randomSeed: 42,similarityCutoff: 0.7})
    """
)
print(f"Similarity relations created: {result.loc[0]['relationshipsWritten']}\n")
print(result.loc[0]["similarityDistribution"])

Similarity relations created: 11636

{'min': 0.7142829895019531, 'p5': 0.8611106872558594, 'max': 1.0000038146972656, 'p99': 1.0000038146972656, 'p1': 0.7906379699707031, 'p10': 0.8928565979003906, 'p90': 1.0000038146972656, 'p50': 1.0000038146972656, 'p25': 0.9285697937011719, 'p75': 1.0000038146972656, 'p95': 1.0000038146972656, 'mean': 0.967150499337971, 'p100': 1.0000038146972656, 'stdDev': 0.051536760920852735}


## Recommendation Systems

In [212]:
import random

random.seed(0)

### Movie based

In [259]:
movieId = random.randint(0, 3952)

gds.run_cypher(
    f"""
    MATCH (m:Movie) WHERE m.movieId = {movieId}
    RETURN m.title as title
    """
).loc[0]["title"]

'Mystery Alaska (1999)'

In [288]:
gds.run_cypher(
    f"""
    MATCH (m:Movie WHERE m.movieId = {movieId})-[s:SIMILAR]-(m2:Movie)
    RETURN m2.title as title, s.score as score
    ORDER BY score DESC
    limit 10
    """
)

Unnamed: 0,title,score
0,Mighty Aphrodite (1995),1.0
1,Black Sheep (1996),1.0
2,It Takes Two (1995),1.0
3,Father of the Bride Part II (1995),1.0
4,Bio-Dome (1996),1.0
5,In the Bleak Midwinter (1995),1.0
6,Don't Be a Menace to South Central While Drink...,1.0
7,Friday (1995),1.0
8,Bottle Rocket (1996),1.0
9,Don't Be a Menace to South Central While Drink...,1.0


In [289]:
gds.run_cypher(
    f"""
    MATCH (m:Movie WHERE m.movieId = {movieId})-[s:SIMILAR_KNN]-(m2:Movie)
    RETURN m2.title as title, s.score as score
    ORDER BY score DESC
    limit 10
    """
)

Unnamed: 0,title,score
0,Mansfield Park (1999),1.0
1,In Too Deep (1999),1.0
2,In Too Deep (1999),1.0
3,Liberty Heights (1999),1.0
4,Mansfield Park (1999),1.0


### User based

In [211]:
userId = random.randint(1, 6040)

gds.run_cypher(
    f"""
    MATCH (u:User {{userId: {userId}}})
    RETURN u.userId, u.age, u.occupation, u.binaryGender, u.favoriteGenreId, u.favoriteYear, u.favoriteMonth, u.ratings, u.minRating, u.maxRating, u.avgRating, u.stdRating
    """
)

Unnamed: 0,u.userId,u.age,u.occupation,u.binaryGender,u.favoriteGenreId,u.favoriteYear,u.favoriteMonth,u.ratings,u.minRating,u.maxRating,u.avgRating,u.stdRating
0,3156,35,7,0,3,2000,9,64,1,5,3,1


In [296]:
gds.run_cypher(
    f"""
    MATCH (u:User {{userId: {userId}}})-[s:SIMILAR]-(u2:User)
    WITH u2.userId as similarUserId, s.score as score
    ORDER BY score DESC
    limit 5
    CALL
    {{ 
        WITH similarUserId
        MATCH (u:User {{userId: similarUserId}})-[r:RATED]->(m:Movie)
        RETURN m.title as title, r.rating as rating
        ORDER BY rating DESC
        LIMIT 2
    }}
    RETURN similarUserId,title,score
    """
)

Unnamed: 0,similarUserId,title,score
0,49,Jurassic Park (1993),1.0
1,49,Saving Private Ryan (1998),1.0
2,122,Hurricane The (1999),1.0
3,122,Rock The (1996),1.0
4,96,Fugitive The (1993),1.0
5,96,Schindler's List (1993),1.0
6,33,Dead Poets Society (1989),1.0
7,33,Witness (1985),1.0
8,125,Terminator The (1984),1.0
9,125,Glory (1989),1.0


In [297]:
gds.run_cypher(
    f"""
    MATCH (u:User {{userId: {userId}}})-[s:SIMILAR_KNN]-(u2:User)
    WITH u2.userId as similarUserId, s.score as score
    ORDER BY score DESC
    limit 5
    CALL
    {{ 
        WITH similarUserId
        MATCH (u:User {{userId: similarUserId}})-[r:RATED]->(m:Movie)
        RETURN m.title as title, r.rating as rating
        ORDER BY rating DESC
        LIMIT 2
    }}
    RETURN similarUserId,title,score
    """
)

Unnamed: 0,similarUserId,title,score
0,4098,Terminator The (1984),0.865419
1,4098,Reservoir Dogs (1992),0.865419
2,4098,Terminator The (1984),0.865419
3,4098,Reservoir Dogs (1992),0.865419
4,2890,Terminator The (1984),0.864802
5,2890,Glory (1989),0.864802
6,2890,Terminator The (1984),0.864802
7,2890,Glory (1989),0.864802
8,672,Bridge on the River Kwai The (1957),0.853535
9,672,Dances with Wolves (1990),0.853535


## Community detection

- Create projection
- Create community
- Try to optimize 
- basic analysis

In [None]:
## TO redoo
result = gds.run_cypher(
    """
    CALL gds.graph.project(
    'movies-analyzed',
    {
        Movie:{properties:['minRating','maxRating','avgRating','stdRating','favoriteYear','favoriteMonth','year']},
        User:{properties: ['binaryGender','age','occupation','favoriteGenreId', 'ratings','minRating','maxRating','avgRating','stdRating','favoriteYear','favoriteMonth']},
        Genre:{properties: ['ratings']}
    },
    {
        RATED: {properties: 'rating', orientation: 'UNDIRECTED'},
        IN_GENRE:{},
        LIKES:{}
    }
    )
    YIELD
    graphName AS graph, nodeProjection, nodeCount AS nodes, relationshipProjection, relationshipCount AS rels
"""
)
result

In [300]:
gds.run_cypher(
    """
    CALL gds.louvain.stats('movies-analyzed')
    YIELD communityCount, modularity, modularities
"""
)

Unnamed: 0,communityCount,modularity,modularities
0,8,0.201548,"[0.20062556692991296, 0.20154750452010453]"
