In [None]:
!pip install neo4j-driver
!pip install graphdatascience

In [6]:
#importing all the required packages 
import sys
from pathlib import Path
import pandas as pd
import numpy as np
from graphdatascience import GraphDataScience

In [17]:
# Connecting to Neo4j with all the required details.
DB_ULR = 'bolt://localhost:7687'
DB_USER = 'neo4j'
DB_PASS = '12345678'
gds = GraphDataScience(DB_ULR, auth=(DB_USER, DB_PASS))
gds.version()

'2.3.5'

In [18]:
# checking for nodes and relationship in an empty instances
nodes = gds.run_cypher('''
    MATCH (n)
    RETURN COUNT(n)
''') 
nodes.head()

Unnamed: 0,COUNT(n)
0,0


In [None]:
movies = pd.read_csv('/Users/vigneshm/Desktop/movies.dat',sep='::',encoding = "ISO-8859-1",names=['MovieID','Title','Genres'])
ratings = pd.read_csv('/Users/vigneshm/Desktop/ratings.dat',sep='::',encoding = "ISO-8859-1",names=['UserID','MovieID','Rating','Timestamp'])
users = pd.read_csv('/Users/vigneshm/Desktop/users.dat',sep='::',encoding = "ISO-8859-1",names=['UserID','Gender','Age','Occupation','Zip_code'])
     

In [20]:
# displaying the top 5 rows of each dataset
display(movies.head())
display(users.head())
display(ratings.head())

Unnamed: 0,MovieID,Title,Genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


Unnamed: 0,UserID,Gender,Age,Occupation,Zip_code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [29]:
#inserting data to graph database by creating user nodes
gds.run_cypher('create constraint if not exists for (n:User) require (n.id) is node key')
create_customer_res = gds.run_cypher('''
    unwind $data as row
    merge (n:User{id: row.UserID})
    set n.Gender = row.Gender
    set n.Age =  row.Age 
    return count(*) as custmers_created
''', params = {'data': users.to_dict('records')})
create_customer_res.head()

Unnamed: 0,custmers_created
0,6040


In [30]:
# inserting data to graph database by creating movies nodes
gds.run_cypher('create constraint if not exists for (n:Movie) require (n.id) is node key')
create_customer_res = gds.run_cypher('''
    unwind $data as row
    merge (n:Movie{id: row.MovieID})
    set n.Title = row.Title
    set n.Genres =  row.Genres 
    return count(*) as movies_created
''', params = {'data': movies.to_dict('records')})
create_customer_res.head()

Unnamed: 0,movies_created
0,3883


In [23]:
# Created rated relationship by spliting it into pieces and then upload one piece at a time  
i=1
for chunk in np.array_split(ratings,200):
  if i%10 == 0:
    print(i)
  create_rated = gds.run_cypher('''
    unwind $data as row
    match (u:User{id: row.UserID}), (m:Movie{id: row.MovieID})
    merge (u)-[r:RATED]->(m)
    set r.Rating = row.Rating
    return count(*) as create_rated
    ''', params = {'data': chunk.to_dict('records')})
  i = i+1
create_rated.head()

10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200


Unnamed: 0,create_rated
0,5001


In [24]:
# Check similar movies as per the users previous watch 
similar_movies = gds.run_cypher('''
    MATCH(m1:Movie)-[r1]-(u:User)-[r2]-(m2:Movie)
    WHERE m1.Title CONTAINS 'Toy Story (1995)'
      AND m2.Title<>'Toy Story (1995)'
      AND r1.Rating=5 AND r2.Rating=5
    RETURN m2.Title,m2.Genres,count(DISTINCT(u)) as common_users
    ORDER BY common_users DESC
''')
similar_movies.head()
     

Unnamed: 0,m2.Title,m2.Genres,common_users
0,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Fantasy|Sci-Fi,401
1,Toy Story 2 (1999),Animation|Children's|Comedy,385
2,Raiders of the Lost Ark (1981),Action|Adventure,373
3,Star Wars: Episode V - The Empire Strikes Back...,Action|Adventure|Drama|Sci-Fi|War,346
4,"Shawshank Redemption, The (1994)",Drama,327


In [31]:
# Check for similar movies
similar_movies = gds.run_cypher('''
    MATCH(m1:Movie)-[r1]-(u:User)-[r2]-(m2:Movie)
    WHERE m1.Title CONTAINS $title
        AND m2.Title<>$title
        AND r1.Rating=5 AND r2.Rating=5
    RETURN m2.Title,m2.Genres,count(DISTINCT(u)) as common_users
    ORDER BY common_users DESC
''',params = {'title':'Matrix, The (1999)'})
similar_movies.head()

Unnamed: 0,m2.Title,m2.Genres,common_users
0,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Fantasy|Sci-Fi,799
1,Star Wars: Episode V - The Empire Strikes Back...,Action|Adventure|Drama|Sci-Fi|War,686
2,Raiders of the Lost Ark (1981),Action|Adventure,613
3,American Beauty (1999),Comedy|Drama,590
4,"Sixth Sense, The (1999)",Thriller,557


In [28]:
#displaying the top 5 rows from movies 
display(movies.head())

Unnamed: 0,MovieID,Title,Genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [32]:
#displaying top 5 movies from the similar_movies dataframe 
similar_movies.head(50)

Unnamed: 0,m2.Title,m2.Genres,common_users
0,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Fantasy|Sci-Fi,799
1,Star Wars: Episode V - The Empire Strikes Back...,Action|Adventure|Drama|Sci-Fi|War,686
2,Raiders of the Lost Ark (1981),Action|Adventure,613
3,American Beauty (1999),Comedy|Drama,590
4,"Sixth Sense, The (1999)",Thriller,557
5,Braveheart (1995),Action|Drama|War,539
6,Saving Private Ryan (1998),Action|Drama|War,534
7,Terminator 2: Judgment Day (1991),Action|Sci-Fi|Thriller,531
8,"Shawshank Redemption, The (1994)",Drama,523
9,Star Wars: Episode VI - Return of the Jedi (1983),Action|Adventure|Romance|Sci-Fi|War,483


In [33]:
# we create recommendations for user by using projection
create_projection = gds.run_cypher('''
   CALL gds.graph.project(
    'myGraph',
    ['User', 'Movie'],
    {
        RATED: {properties:  'Rating'}
            } 
);
''')
create_projection.head()

Unnamed: 0,nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
0,"{'Movie': {'label': 'Movie', 'properties': {}}...","{'RATED': {'orientation': 'NATURAL', 'indexInv...",myGraph,9923,1000209,959


In [34]:
# Get user similarity
users_similarity = gds.run_cypher('''
CALL gds.nodeSimilarity.stream('myGraph')
YIELD node1, node2, similarity
RETURN gds.util.asNode(node1).id AS UserID1, gds.util.asNode(node2).id AS UserID2, similarity
ORDER BY similarity DESCENDING, UserID1, UserID2
''')
users_similarity.head()

Unnamed: 0,UserID1,UserID2,similarity
0,4725,4808,0.755415
1,4808,4725,0.755415
2,1122,2126,0.632
3,2126,1122,0.632
4,1272,2837,0.601852


In [35]:
# create a new relationship between users. Similarity score between them is add as an attribute
i=1
for chunk in np.array_split(users_similarity.query('UserID1>UserID2'),10):
  print(i)
  create_similar = gds.run_cypher('''
    unwind $data as row
    match (u1:User{id: row.UserID1}), (u2:User{id: row.UserID2})
    merge (u1)-[r:SIMILAR]->(u2)
    set r.Similarity=row.similarity
    return count(*) as create_rated
    ''', params = {'data': chunk.to_dict('records')})
  i = i+1
create_similar.head()

1
2
3
4
5
6
7
8
9
10


Unnamed: 0,create_rated
0,3179


In [36]:
# Check similar movies with similar users and then movies they have rated
similar_movies_for_user = gds.run_cypher('''
    MATCH (u1:User)-[r1:SIMILAR]-(u2)-[r2:RATED]-(m:Movie)
    WHERE id(u1)=$id
    AND NOT ( (u1)-[]-(m))
    RETURN m.Title,m.Genres,Sum(r1.Similarity*r2.Rating)/sum(r1.Similarity)+log(count(r2)) as score
    ORDER BY score DESC
''',params = {'id':4725})
similar_movies_for_user.head(10)

Unnamed: 0,m.Title,m.Genres,score
0,Schindler's List (1993),Drama|War,6.864803
1,Toy Story (1995),Animation|Children's|Comedy,6.497486
2,October Sky (1999),Drama,6.390129
3,Almost Famous (2000),Comedy|Drama,6.389066
4,Bulworth (1998),Comedy,6.283263
5,Boys Don't Cry (1999),Drama,6.259083
6,Apollo 13 (1995),Drama,6.209804
7,Boogie Nights (1997),Drama,6.169459
8,GoodFellas (1990),Crime|Drama,6.156693
9,"Simple Plan, A (1998)",Crime|Thriller,6.120458
