In [None]:
import pandas as pd
import numpy as np
import datetime
from collections import Counter
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
genome_scores_data = pd.read_csv("genome_scores.csv")
movies_data = pd.read_csv("movie.csv")
ratings_data = pd.read_csv("rating.csv")

In [None]:
# Calculate the number of rows to select (30%)
num_rows_to_select1 = int(len(genome_scores_data) * 0.1)
num_rows_to_select2 = int(len(movies_data) * 0.1)
num_rows_to_select3 = int(len(ratings_data) * 0.1)

# Select the first 30% of data for each DataFrame
genome_scores_data = genome_scores_data.head(num_rows_to_select1)
movies_data = movies_data.head(num_rows_to_select2)
ratings_data = ratings_data.head(num_rows_to_select3)

In [None]:
genome_scores_data.head()

Unnamed: 0,movieId,tagId,relevance
0,1,1,0.025
1,1,2,0.025
2,1,3,0.05775
3,1,4,0.09675
4,1,5,0.14675


In [None]:
movies_data.head()

Unnamed: 0,movieId,title,genres,year
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1995
1,2,Jumanji (1995),Adventure|Children|Fantasy,1995
2,3,Grumpier Old Men (1995),Comedy|Romance,1995
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,1995
4,5,Father of the Bride Part II (1995),Comedy,1995


In [None]:
ratings_data.head()


Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40


In [None]:
scores_pivot = genome_scores_data.pivot_table(index = ["movieId"],columns = ["tagId"],values = "relevance").reset_index()
scores_pivot.head()

tagId,movieId,1,2,3,4,5,6,7,8,9,...,1119,1120,1121,1122,1123,1124,1125,1126,1127,1128
0,1,0.025,0.025,0.05775,0.09675,0.14675,0.217,0.067,0.26275,0.262,...,0.0395,0.018,0.04575,0.03275,0.125,0.0415,0.01925,0.03625,0.07775,0.023
1,2,0.03975,0.04375,0.03775,0.048,0.11025,0.0725,0.04775,0.10975,0.09925,...,0.04175,0.01925,0.01725,0.02425,0.1255,0.0225,0.0155,0.01475,0.09025,0.01875
2,3,0.0435,0.05475,0.028,0.077,0.054,0.0685,0.056,0.185,0.04925,...,0.0415,0.02675,0.02775,0.03425,0.1555,0.03675,0.017,0.0195,0.097,0.0185
3,4,0.03725,0.0395,0.03675,0.031,0.06825,0.0405,0.02325,0.087,0.05125,...,0.0575,0.03375,0.02275,0.03975,0.18525,0.05925,0.015,0.01525,0.0645,0.013
4,5,0.042,0.05275,0.05925,0.03675,0.07525,0.12525,0.0285,0.085,0.0295,...,0.0425,0.02825,0.0215,0.026,0.14275,0.02075,0.0165,0.01675,0.1075,0.01825


In [None]:
#join
mov_tag_df = movies_data.merge(scores_pivot, left_on="movieId", right_on="movieId", how="left")
mov_tag_df = mov_tag_df.fillna(0)
mov_tag_df = mov_tag_df.drop(['title','genres'], axis = 1)
mov_tag_df.head()

Unnamed: 0,movieId,year,1,2,3,4,5,6,7,8,...,1119,1120,1121,1122,1123,1124,1125,1126,1127,1128
0,1,1995,0.025,0.025,0.05775,0.09675,0.14675,0.217,0.067,0.26275,...,0.0395,0.018,0.04575,0.03275,0.125,0.0415,0.01925,0.03625,0.07775,0.023
1,2,1995,0.03975,0.04375,0.03775,0.048,0.11025,0.0725,0.04775,0.10975,...,0.04175,0.01925,0.01725,0.02425,0.1255,0.0225,0.0155,0.01475,0.09025,0.01875
2,3,1995,0.0435,0.05475,0.028,0.077,0.054,0.0685,0.056,0.185,...,0.0415,0.02675,0.02775,0.03425,0.1555,0.03675,0.017,0.0195,0.097,0.0185
3,4,1995,0.03725,0.0395,0.03675,0.031,0.06825,0.0405,0.02325,0.087,...,0.0575,0.03375,0.02275,0.03975,0.18525,0.05925,0.015,0.01525,0.0645,0.013
4,5,1995,0.042,0.05275,0.05925,0.03675,0.07525,0.12525,0.0285,0.085,...,0.0425,0.02825,0.0215,0.026,0.14275,0.02075,0.0165,0.01675,0.1075,0.01825


In [None]:
def set_genres(genres,col):
    if genres in col.split('|'): return 1
    else: return 0

In [None]:
genres = ['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western', '(no genres listed)']

# Create a DataFrame to hold genre information
mov_genres_df = movies_data.copy()

# Initialize genre columns with zeros
for genre in genres:
    mov_genres_df[genre] = 0

In [None]:
mov_genres_df["Action"] = mov_genres_df.apply(lambda x: set_genres("Action",x['genres']), axis=1)
mov_genres_df["Adventure"] = mov_genres_df.apply(lambda x: set_genres("Adventure",x['genres']), axis=1)
mov_genres_df["Animation"] = mov_genres_df.apply(lambda x: set_genres("Animation",x['genres']), axis=1)
mov_genres_df["Children"] = mov_genres_df.apply(lambda x: set_genres("Children",x['genres']), axis=1)
mov_genres_df["Comedy"] = mov_genres_df.apply(lambda x: set_genres("Comedy",x['genres']), axis=1)
mov_genres_df["Crime"] = mov_genres_df.apply(lambda x: set_genres("Crime",x['genres']), axis=1)
mov_genres_df["Documentary"] = mov_genres_df.apply(lambda x: set_genres("Documentary",x['genres']), axis=1)
mov_genres_df["Drama"] = mov_genres_df.apply(lambda x: set_genres("Drama",x['genres']), axis=1)
mov_genres_df["Fantasy"] = mov_genres_df.apply(lambda x: set_genres("Fantasy",x['genres']), axis=1)
mov_genres_df["Film-Noir"] = mov_genres_df.apply(lambda x: set_genres("Film-Noir",x['genres']), axis=1)
mov_genres_df["Horror"] = mov_genres_df.apply(lambda x: set_genres("Horror",x['genres']), axis=1)
mov_genres_df["Musical"] = mov_genres_df.apply(lambda x: set_genres("Musical",x['genres']), axis=1)
mov_genres_df["Mystery"] = mov_genres_df.apply(lambda x: set_genres("Mystery",x['genres']), axis=1)
mov_genres_df["Romance"] = mov_genres_df.apply(lambda x: set_genres("Romance",x['genres']), axis=1)
mov_genres_df["Sci-Fi"] = mov_genres_df.apply(lambda x: set_genres("Sci-Fi",x['genres']), axis=1)
mov_genres_df["Thriller"] = mov_genres_df.apply(lambda x: set_genres("Thriller",x['genres']), axis=1)
mov_genres_df["War"] = mov_genres_df.apply(lambda x: set_genres("War",x['genres']), axis=1)
mov_genres_df["Western"] = mov_genres_df.apply(lambda x: set_genres("Western",x['genres']), axis=1)
mov_genres_df["(no genres listed)"] = mov_genres_df.apply(lambda x: set_genres("(no genres listed)",x['genres']), axis=1)

In [None]:
mov_genres_df.drop(['title','genres'], axis = 1, inplace=True)
mov_genres_df.head()

Unnamed: 0,movieId,year,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,...,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,(no genres listed)
0,1,1995,0,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,1995,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,1995,0,0,0,0,1,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,4,1995,0,0,0,0,1,0,0,1,...,0,0,0,0,1,0,0,0,0,0
4,5,1995,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
def set_year(title):
    year = title.strip()[-5:-1]
    if year.isnumeric():
        return int(year)
    else:
        return 1800

# Add the 'year' field to the movies_data DataFrame
movies_data['year'] = movies_data['title'].apply(lambda x: set_year(x))

# Create a new DataFrame without the 'genres' column
movies = movies_data.drop('genres', axis=1)

# Display the first few rows
movies.head()


Unnamed: 0,movieId,title,year
0,1,Toy Story (1995),1995
1,2,Jumanji (1995),1995
2,3,Grumpier Old Men (1995),1995
3,4,Waiting to Exhale (1995),1995
4,5,Father of the Bride Part II (1995),1995


In [None]:
#define function to group years
def set_year_group(year):
    if (year < 1900): return 0
    elif (1900 <= year <= 1975): return 1
    elif (1976 <= year <= 1995): return 2
    elif (1996 <= year <= 2003): return 3
    elif (2004 <= year <= 2009): return 4
    elif (2010 <= year): return 5
    else: return 0
movies['year_group'] = movies.apply(lambda x: set_year_group(x['year']), axis=1)
#no need title and year fields
movies.drop(['title','year'], axis = 1, inplace=True)

In [None]:
agg_movies_rat = ratings_data.groupby(['movieId']).agg({'rating': [np.size, np.mean]}).reset_index()
agg_movies_rat.columns = ['movieId','rating_counts', 'rating_mean']
agg_movies_rat.head()

Unnamed: 0,movieId,rating_counts,rating_mean
0,1,21,4.02381
1,2,6,3.083333
2,3,7,3.571429
3,4,1,2.0
4,5,5,2.5


In [None]:
#define function to group rating counts
def set_rating_group(rating_counts):
    if (rating_counts <= 1): return 0
    elif (2 <= rating_counts <= 10): return 1
    elif (11 <= rating_counts <= 100): return 2
    elif (101 <= rating_counts <= 1000): return 3
    elif (1001 <= rating_counts <= 5000): return 4
    elif (5001 <= rating_counts): return 5
    else: return 0
agg_movies_rat['rating_group'] = agg_movies_rat.apply(lambda x: set_rating_group(x['rating_counts']), axis=1)
#no need rating_counts field
agg_movies_rat.drop('rating_counts', axis = 1, inplace=True)
mov_rating_df = movies.merge(agg_movies_rat, left_on='movieId', right_on='movieId', how='left')
mov_rating_df = mov_rating_df.fillna(0)
mov_rating_df.head()

Unnamed: 0,movieId,year_group,rating_mean,rating_group
0,1,2,4.02381,2.0
1,2,2,3.083333,1.0
2,3,2,3.571429,1.0
3,4,2,2.0,0.0
4,5,2,2.5,1.0


In [None]:
mov_tag_df = mov_tag_df.set_index('movieId')
mov_genres_df = mov_genres_df.set_index('movieId')
mov_rating_df = mov_rating_df.set_index('movieId')

In [None]:
#cosine similarity for mov_tag_df
cos_tag = cosine_similarity(mov_tag_df.values)*0.5
#cosine similarity for mov_genres_df
cos_genres = cosine_similarity(mov_genres_df.values)*0.25
#cosine similarity for mov_rating_df
cos_rating = cosine_similarity(mov_rating_df.values)*0.25
#mix
cos = cos_tag+cos_genres+cos_rating

In [None]:
cols = mov_tag_df.index.values
inx = mov_tag_df.index
movies_sim = pd.DataFrame(cos, columns=cols, index=inx)
movies_sim.head()

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,45,46,47,48,49,50,51,52,53,54
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.99564,0.995727,0.966503,0.993369,0.999796,0.995418,0.976562,0.978392,0.995827,...,0.995583,0.976562,0.999987,0.995072,0.851653,0.99972,0.851653,0.995625,0.851654,0.851654
2,0.99564,1.0,0.99942,0.985929,0.998692,0.995342,0.998822,0.991208,0.989721,0.999808,...,0.999119,0.991208,0.995651,0.998267,0.881272,0.995484,0.881272,0.999201,0.881272,0.881272
3,0.995727,0.99942,1.0,0.983738,0.996381,0.994775,0.999894,0.992127,0.99259,0.999892,...,0.999967,0.992127,0.995876,0.99969,0.86866,0.996344,0.868659,0.999981,0.868659,0.86866
4,0.966503,0.985929,0.983738,1.0,0.98717,0.966353,0.982475,0.995144,0.987169,0.98481,...,0.983073,0.995144,0.966492,0.981453,0.926775,0.966237,0.926775,0.983248,0.926775,0.926775
5,0.993369,0.998692,0.996381,0.98717,1.0,0.99406,0.99505,0.987731,0.983331,0.997511,...,0.99568,0.987731,0.993178,0.993973,0.899069,0.992059,0.899069,0.995864,0.899069,0.899069


In [None]:
def get_similar(movieId):
    df = movies_sim.loc[movies_sim.index == movieId].reset_index(). \
            melt(id_vars='movieId', var_name='sim_moveId', value_name='relevance'). \
            sort_values('relevance', axis=0, ascending=False)[1:6]
    return df
#create empty df
movies_similarity = pd.DataFrame(columns=['movieId','sim_moveId','relevance'])

In [None]:
for x in movies_sim.index.tolist():
    movies_similarity = movies_similarity.append(get_similar(x))
movies_similarity.head()

  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_similar(x))
  movies_similarity = movies_similarity.append(get_simi

Unnamed: 0,movieId,sim_moveId,relevance
10,1,11,0.999988
46,1,47,0.999987
31,1,32,0.99984
5,1,6,0.999796
49,1,50,0.99972


In [None]:
def movie_recommender(movieId):
    df = movies_sim.loc[movies_sim.index == movieId].reset_index(). \
            melt(id_vars='movieId', var_name='sim_moveId', value_name='relevance'). \
            sort_values('relevance', axis=0, ascending=False)[1:6]
    df['sim_moveId'] = df['sim_moveId'].astype(int)
    sim_df = movies_data.merge(df, left_on='movieId', right_on='sim_moveId', how='inner'). \
                sort_values('relevance', axis=0, ascending=False). \
                loc[: , ['movieId_y','title','genres']]. \
                rename(columns={ 'movieId_y': "movieId" })
    return sim_df

In [None]:
#get recommendation for Toy Story
movie_recommender(52)

Unnamed: 0,movieId,title,genres
4,52,To Die For (1995),Comedy|Drama|Thriller
2,52,Dangerous Minds (1995),Drama
0,52,Grumpier Old Men (1995),Comedy|Romance
1,52,Casino (1995),Crime|Drama
3,52,Dead Man Walking (1995),Crime|Drama


In [None]:
users_df = pd.DataFrame(ratings_data['userId'].unique(), columns=['userId'])
users_df.head()

Unnamed: 0,userId
0,1
1,2
2,3
3,4
4,5


In [None]:
#create movies_df
movies_df = movies_data.drop('genres', axis = 1)
#calculate mean of ratings for each movies
agg_rating_avg = ratings_data.groupby(['movieId']).agg({'rating': np.mean}).reset_index()
agg_rating_avg.columns = ['movieId', 'rating_mean']
#merge
movies_df = movies_df.merge(agg_rating_avg, left_on='movieId', right_on='movieId', how='left')
movies_df.head()

Unnamed: 0,movieId,title,year,rating_mean
0,1,Toy Story (1995),1995,4.02381
1,2,Jumanji (1995),1995,3.083333
2,3,Grumpier Old Men (1995),1995,3.571429
3,4,Waiting to Exhale (1995),1995,2.0
4,5,Father of the Bride Part II (1995),1995,2.5


In [None]:
genres = [
    "Action",
    "Adventure",
    "Animation",
    "Children",
    "Comedy",
    "Crime",
    "Documentary",
    "Drama",
    "Fantasy",
    "Film-Noir",
    "Horror",
    "Musical",
    "Mystery",
    "Romance",
    "Sci-Fi",
    "Thriller",
    "War",
    "Western",
    "(no genres listed)"]
genres_df = pd.DataFrame(genres, columns=['genres'])
genres_df.head()

Unnamed: 0,genres
0,Action
1,Adventure
2,Animation
3,Children
4,Comedy


In [None]:
users_movies_df = ratings_data.drop('timestamp', axis = 1)
users_movies_df.head()

Unnamed: 0,userId,movieId,rating
0,1,2,3.5
1,1,29,3.5
2,1,32,3.5
3,1,47,3.5
4,1,50,3.5


In [None]:
movies_genres_df = movies_data.drop('title', axis = 1)


In [None]:
#define a function to split genres field
def get_movie_genres(movieId):
    movie = movies_genres_df[movies_genres_df['movieId']==movieId]
    genres = movie['genres'].tolist()
    df = pd.DataFrame([b for a in [i.split('|') for i in genres] for b in a], columns=['genres'])
    df.insert(loc=0, column='movieId', value=movieId)
    return df

In [None]:
#create empty df
movies_genres=pd.DataFrame(columns=['movieId','genres'])
for x in movies_genres_df['movieId'].tolist():
    movies_genres=movies_genres.append(get_movie_genres(x))
movies_genres.head()

  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genres=movies_genres.append(get_movie_genres(x))
  movies_genre

Unnamed: 0,movieId,genres
0,1,Adventure
1,1,Animation
2,1,Children
3,1,Comedy
4,1,Fantasy


In [None]:
#join to movies data to get genre information
user_genres_df = ratings_data.merge(movies_data, left_on='movieId', right_on='movieId', how='left')
#drop columns that will not be used
user_genres_df.drop(['movieId','rating','timestamp','title'], axis = 1, inplace=True)
user_genres_df.head()

Unnamed: 0,userId,genres,year
0,1,Adventure|Children|Fantasy,1995.0
1,1,Adventure|Drama|Fantasy|Mystery|Sci-Fi,1995.0
2,1,Mystery|Sci-Fi|Thriller,1995.0
3,1,Mystery|Thriller,1995.0
4,1,Crime|Mystery|Thriller,1995.0


In [None]:
def get_favorite_genre(userId):
    user = user_genres_df[user_genres_df['userId']==userId]
    genres = user['genres'].tolist()
    if not genres:
        return "No suggestion"
    else:
        movie_list = [b for a in [i.split('|') for i in genres] for b in a]
        counter = Counter(movie_list)
        return counter.most_common(1)[0][0]

In [None]:
user_genres_df.dropna(inplace=True)

In [None]:
#create empty df
users_genres = pd.DataFrame(columns=['userId','genre'])
for x in users_df['userId'].tolist():
    users_genres = users_genres.append(pd.DataFrame([[x,get_favorite_genre(x)]], columns=['userId','genre']))
users_genres.head()

  users_genres = users_genres.append(pd.DataFrame([[x,get_favorite_genre(x)]], columns=['userId','genre']))
  users_genres = users_genres.append(pd.DataFrame([[x,get_favorite_genre(x)]], columns=['userId','genre']))
  users_genres = users_genres.append(pd.DataFrame([[x,get_favorite_genre(x)]], columns=['userId','genre']))
  users_genres = users_genres.append(pd.DataFrame([[x,get_favorite_genre(x)]], columns=['userId','genre']))
  users_genres = users_genres.append(pd.DataFrame([[x,get_favorite_genre(x)]], columns=['userId','genre']))
  users_genres = users_genres.append(pd.DataFrame([[x,get_favorite_genre(x)]], columns=['userId','genre']))
  users_genres = users_genres.append(pd.DataFrame([[x,get_favorite_genre(x)]], columns=['userId','genre']))
  users_genres = users_genres.append(pd.DataFrame([[x,get_favorite_genre(x)]], columns=['userId','genre']))
  users_genres = users_genres.append(pd.DataFrame([[x,get_favorite_genre(x)]], columns=['userId','genre']))
  users_genres = users_genre

Unnamed: 0,userId,genre
0,1,Mystery
0,2,Comedy
0,3,Sci-Fi
0,4,Thriller
0,5,Drama


In [None]:
users_df.to_csv('users.csv', sep='|', header=True, index=False)
movies_df.to_csv('movies.csv', sep='|', header=True, index=False)
genres_df.to_csv('genres.csv', sep='|', header=True, index=False)
users_movies_df.to_csv('users_movies.csv', sep='|', header=True, index=False)
movies_genres.to_csv('movies_genres.csv', sep='|', header=True, index=False)
users_genres.to_csv('users_genres.csv', sep='|', header=True, index=False)
movies_similarity.to_csv('movies_similarity.csv', sep='|', header=True, index=False)

In [None]:
from neo4j import GraphDatabase


In [None]:
# Define a connection to the database
uri = "bolt://localhost:7687"  # Replace with your database URI
username = "neo4j"     # Replace with your username
password = "password"     # Replace with your password

# Create a connection to the database
driver = GraphDatabase.driver(uri, auth=(username, password))

In [None]:
session=driver.session()

In [None]:
cypher_query = """
LOAD CSV WITH HEADERS FROM "file:///users.csv" AS row
FIELDTERMINATOR '|'
CREATE (:Users {userId: row.userId});
"""

# Function to execute the Cypher query
def execute_cypher_query(query):
    with GraphDatabase.driver(uri, auth=(username, password)) as driver:
        with driver.session() as session:
            result = session.write_transaction(lambda tx: tx.run(query))
            # Perform any necessary processing or error handling here
            return

# Execute the Cypher query
execute_cypher_query(cypher_query)

  result = session.write_transaction(lambda tx: tx.run(query))


In [None]:
q1='MATCH (n:Users) RETURN n LIMIT 10'
nodes=session.run(q1)
for node in nodes:
    print(node)

<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:0' labels=frozenset({'Users'}) properties={'userId': '1'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:1' labels=frozenset({'Users'}) properties={'userId': '2'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:2' labels=frozenset({'Users'}) properties={'userId': '3'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:3' labels=frozenset({'Users'}) properties={'userId': '4'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:4' labels=frozenset({'Users'}) properties={'userId': '5'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:5' labels=frozenset({'Users'}) properties={'userId': '6'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:6' labels=frozenset({'Users'}) properties={'userId': '7'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:7' labels=frozenset({'Users'}) properties={'

In [None]:
cypher_query = """
LOAD CSV WITH HEADERS FROM 'file:///movies.csv' AS row
FIELDTERMINATOR '|'
CREATE (:Movies {movieId: row.movieId, title: row.title, rating_mean: row.rating_mean});
"""

# Function to execute the Cypher query
def execute_cypher_query(query):
    with GraphDatabase.driver(uri, auth=(username, password)) as driver:
        with driver.session() as session:
            result = session.write_transaction(lambda tx: tx.run(query))
            # Perform any necessary processing or error handling here
            return

# Execute the Cypher query
execute_cypher_query(cypher_query)

  result = session.write_transaction(lambda tx: tx.run(query))


In [None]:
q1='MATCH (n:Movies) RETURN n LIMIT 10'
nodes1=session.run(q1)
for node in nodes1:
    print(node)

<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:27221' labels=frozenset({'Movies'}) properties={'rating_mean': '3.9129589853738365', 'movieId': '1', 'title': 'Toy Story (1995)'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:27222' labels=frozenset({'Movies'}) properties={'rating_mean': '3.2004984141368373', 'movieId': '2', 'title': 'Jumanji (1995)'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:27223' labels=frozenset({'Movies'}) properties={'rating_mean': '3.1476095617529882', 'movieId': '3', 'title': 'Grumpier Old Men (1995)'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:27224' labels=frozenset({'Movies'}) properties={'rating_mean': '2.8787610619469026', 'movieId': '4', 'title': 'Waiting to Exhale (1995)'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:27225' labels=frozenset({'Movies'}) properties={'rating_mean': '3.0719394194362644', 'movieId': '5', 'title': 'Father of the

In [None]:
cypher_query = """
LOAD CSV WITH HEADERS FROM 'file:///genres.csv' AS row
FIELDTERMINATOR '|'
CREATE (:Genres {genres: row.genres});
"""

# Function to execute the Cypher query
def execute_cypher_query(query):
    with GraphDatabase.driver(uri, auth=(username, password)) as driver:
        with driver.session() as session:
            result = session.write_transaction(lambda tx: tx.run(query))
            # Perform any necessary processing or error handling here
            return

# Execute the Cypher query
execute_cypher_query(cypher_query)

  result = session.write_transaction(lambda tx: tx.run(query))


In [None]:
q1='MATCH (n:Genres) RETURN n LIMIT 25'
nodes2=session.run(q1)
for node in nodes2:
    print(node)

<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:32676' labels=frozenset({'Genres'}) properties={'genres': 'Action'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:32677' labels=frozenset({'Genres'}) properties={'genres': 'Adventure'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:32678' labels=frozenset({'Genres'}) properties={'genres': 'Animation'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:32679' labels=frozenset({'Genres'}) properties={'genres': 'Children'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:32680' labels=frozenset({'Genres'}) properties={'genres': 'Comedy'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:32681' labels=frozenset({'Genres'}) properties={'genres': 'Crime'}>>
<Record n=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:32682' labels=frozenset({'Genres'}) properties={'genres': 'Documentary'}>>
<Record n=<Node element_id='4

In [None]:
cypher_query = """
LOAD CSV WITH HEADERS FROM 'file:///users_movies.csv' AS row
FIELDTERMINATOR '|'
MATCH (user:Users {userId: row.userId})
MATCH (movie:Movies {movieId: row.movieId})
MERGE (user)-[:WATCHED {rating: row.rating}]->(movie);
"""

# Function to execute the Cypher query
def execute_cypher_query(query):
    with GraphDatabase.driver(uri, auth=(username, password)) as driver:
        with driver.session() as session:
            result = session.write_transaction(lambda tx: tx.run(query))
            # Perform any necessary processing or error handling here
            return

# Execute the Cypher query
execute_cypher_query(cypher_query)

  result = session.write_transaction(lambda tx: tx.run(query))


In [None]:
q1='MATCH p=()-[r:WATCHED]->() RETURN p LIMIT 25'
nodes3=session.run(q1)
for node in nodes3:
    print(node)

<Record p=<Path start=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:0' labels=frozenset({'Users'}) properties={'userId': '1'}> end=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:29275' labels=frozenset({'Movies'}) properties={'rating_mean': '3.6993185689948893', 'movieId': '2138', 'title': 'Watership Down (1978)'}> size=1>>
<Record p=<Path start=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:0' labels=frozenset({'Users'}) properties={'userId': '1'}> end=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:28577' labels=frozenset({'Movies'}) properties={'rating_mean': '3.7977159656264132', 'movieId': '1387', 'title': 'Jaws (1975)'}> size=1>>
<Record p=<Path start=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:0' labels=frozenset({'Users'}) properties={'userId': '1'}> end=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:101552' labels=frozenset({'Movies'}) properties={'rating_mean': '3.9246311738293778', 'movieId': '1036', 'title': 'Di

In [None]:
cypher_query = """
LOAD CSV WITH HEADERS FROM 'file:///users_genres.csv' AS row
FIELDTERMINATOR '|'
MATCH (user:Users {userId: row.userId})
MATCH (genres:Genres {genres: row.genre})
MERGE (user)-[:FAVORITE]->(genres);
"""

# Function to execute the Cypher query
def execute_cypher_query(query):
    with GraphDatabase.driver(uri, auth=(username, password)) as driver:
        with driver.session() as session:
            result = session.write_transaction(lambda tx: tx.run(query))
            # Perform any necessary processing or error handling here
            return

# Execute the Cypher query
execute_cypher_query(cypher_query)

  result = session.write_transaction(lambda tx: tx.run(query))


In [None]:
q1='MATCH p=()-[:FAVORITE]->() RETURN p LIMIT 25'
nodes4=session.run(q1)
for node in nodes4:
    print(node)

<Record p=<Path start=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:0' labels=frozenset({'Users'}) properties={'userId': '1'}> end=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:32677' labels=frozenset({'Genres'}) properties={'genres': 'Adventure'}> size=1>>
<Record p=<Path start=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:0' labels=frozenset({'Users'}) properties={'userId': '1'}> end=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:103301' labels=frozenset({'Genres'}) properties={'genres': 'Adventure'}> size=1>>
<Record p=<Path start=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:0' labels=frozenset({'Users'}) properties={'userId': '1'}> end=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:103282' labels=frozenset({'Genres'}) properties={'genres': 'Adventure'}> size=1>>
<Record p=<Path start=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:0' labels=frozenset({'Users'}) properties={'userId': '1'}> end=<Node element_id

In [None]:
cypher_query = """
LOAD CSV WITH HEADERS FROM 'file:///movies_genres.csv' AS row
FIELDTERMINATOR '|'
MATCH (movie:Movies {movieId: row.movieId})
MATCH (genres:Genres {genres: row.genres})
MERGE (movie)-[:GENRES]->(genres);
"""

# Function to execute the Cypher query
def execute_cypher_query(query):
    with GraphDatabase.driver(uri, auth=(username, password)) as driver:
        with driver.session() as session:
            result = session.write_transaction(lambda tx: tx.run(query))
            # Perform any necessary processing or error handling here
            return

# Execute the Cypher query
execute_cypher_query(cypher_query)

  result = session.write_transaction(lambda tx: tx.run(query))


In [None]:
q1='MATCH p=()-[:GENRES]->() RETURN p LIMIT 25'
nodes5=session.run(q1)
for node in nodes5:
    print(node)

<Record p=<Path start=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:27221' labels=frozenset({'Movies'}) properties={'rating_mean': '3.9129589853738365', 'movieId': '1', 'title': 'Toy Story (1995)'}> end=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:32677' labels=frozenset({'Genres'}) properties={'genres': 'Adventure'}> size=1>>
<Record p=<Path start=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:27221' labels=frozenset({'Movies'}) properties={'rating_mean': '3.9129589853738365', 'movieId': '1', 'title': 'Toy Story (1995)'}> end=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:32678' labels=frozenset({'Genres'}) properties={'genres': 'Animation'}> size=1>>
<Record p=<Path start=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:27221' labels=frozenset({'Movies'}) properties={'rating_mean': '3.9129589853738365', 'movieId': '1', 'title': 'Toy Story (1995)'}> end=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:32679' labels=frozenset({

In [None]:
cypher_query = """
LOAD CSV WITH HEADERS FROM 'file:///movies_similarity.csv' AS row
FIELDTERMINATOR '|'
MATCH (movie1:Movies {movieId: row.movieId})
MATCH (movie2:Movies {movieId: row.sim_moveId})
MERGE (movie1)-[:SIMILAR {relevance: row.relevance}]->(movie2);
"""
execute_cypher_query(cypher_query)

  result = session.write_transaction(lambda tx: tx.run(query))


In [None]:
q1='MATCH p=()-[:SIMILAR]->() RETURN p LIMIT 25'
nodes6=session.run(q1)
for node in nodes6:
    print(node)

<Record p=<Path start=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:27221' labels=frozenset({'Movies'}) properties={'rating_mean': '3.9129589853738365', 'movieId': '1', 'title': 'Toy Story (1995)'}> end=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:27803' labels=frozenset({'Movies'}) properties={'rating_mean': '3.673583057901824', 'movieId': '588', 'title': 'Aladdin (1992)'}> size=1>>
<Record p=<Path start=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:27221' labels=frozenset({'Movies'}) properties={'rating_mean': '3.9129589853738365', 'movieId': '1', 'title': 'Toy Story (1995)'}> end=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:27953' labels=frozenset({'Movies'}) properties={'rating_mean': '4.157047260560435', 'movieId': '745', 'title': 'Wallace & Gromit: A Close Shave (1995)'}> size=1>>
<Record p=<Path start=<Node element_id='4:cd7ae953-9f93-4cfa-8372-358331f017a8:27221' labels=frozenset({'Movies'}) properties={'rating_mean': '3.91295898537

In [None]:
# Define your Cypher query
q2 = "MATCH (u:Users)-[:WATCHED]->(m1:Movies) WHERE u.userId =~ '4' RETURN u.userId, m1.title, m1.rating_mean"

# Establish a session and run the query
with GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password")) as driver:
    with driver.session() as session:
        result = session.run(q2)

        # Initialize a counter
        count = 0

        # Iterate through the result and print the first 5 records
        for record in result:
            if count < 5:
                print("user:",record["u.userId"], "Movie:",record["m1.title"],"rating:",record["m1.rating_mean"])
                count += 1
            else:
                break  # Exit the loop after printing 5 records


user: 4 Movie: Dave (1993) rating: 3.6056766345666498
user: 4 Movie: Terminal Velocity (1994) rating: 2.918560606060606
user: 4 Movie: Secret Garden, The (1993) rating: 3.528159340659341
user: 4 Movie: Twelve Monkeys (a.k.a. 12 Monkeys) (1995) rating: 3.8912209433536447
user: 4 Movie: Ace Ventura: When Nature Calls (1995) rating: 2.619682151589242


In [None]:

# Define your Cypher query
q4 = """
MATCH (u:Users)-[:WATCHED]->(m1:Movies)-[s:SIMILAR]->(m2:Movies)
WHERE u.userId =~ '4'
RETURN u.userId, m1.title, m2.title, m2.rating_mean
"""

# Establish a session and run the query
with GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password")) as driver:
    with driver.session() as session:
        result = session.run(q4)
        count=0
        # Iterate through the result and print each record
        for record in result:
            if count < 5:
                print(record["u.userId"], record["m1.title"], record["m2.title"], record["m2.rating_mean"])
            else:
                break


4 Dave (1993) Much Ado About Nothing (1993) 3.875623663578047
4 Dave (1993) Speechless (1994) 3.1294117647058823
4 Dave (1993) Englishman Who Went Up a Hill But Came Down a Mountain, The (1995) 3.3337874659400546
4 Dave (1993) Pretty Woman (1990) 3.407504887953076
4 Dave (1993) American President, The (1995) 3.6997118155619595
4 Dave (1993) Englishman Who Went Up a Hill But Came Down a Mountain, The (1995) 3.328114126652749
4 Dave (1993) American President, The (1995) 3.6943488238668962
4 Dave (1993) Pretty Woman (1990) 3.413171884229179
4 Dave (1993) Speechless (1994) 3.0951417004048585
4 Dave (1993) Much Ado About Nothing (1993) 3.884257602862254
4 Terminal Velocity (1994) Blown Away (1994) 2.985655737704918
4 Terminal Velocity (1994) Broken Arrow (1996) 3.0883689232167195
4 Terminal Velocity (1994) Glimmer Man, The (1996) 2.642857142857143
4 Terminal Velocity (1994) Drop Zone (1994) 3.0440528634361232
4 Terminal Velocity (1994) Blown Away (1994) 3.021694214876033
4 Terminal Velocity

In [None]:
# Define your Cypher query
q4 = """
MATCH (u:Users)-[:WATCHED]->(m1:Movies)-[s:SIMILAR]->(m2:Movies)
WHERE u.userId =~ '4'
RETURN u.userId, m1.title, m2.title, m2.rating_mean
"""

# Establish a session and run the query
with GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password")) as driver:
    with driver.session() as session:
        result = session.run(q4)

        # Initialize a counter
        count = 0

        # Iterate through the result and print each record, up to 5 records
        for record in result:
            print("user:",record["u.userId"],"Movie:",record["m1.title"], record["m2.title"], record["m2.rating_mean"])
            count += 1

            # Limit to the first 5 records
            if count >= 5:
                break

user: 4 Movie: Dave (1993) Much Ado About Nothing (1993) 3.875623663578047
user: 4 Movie: Dave (1993) Speechless (1994) 3.1294117647058823
user: 4 Movie: Dave (1993) Englishman Who Went Up a Hill But Came Down a Mountain, The (1995) 3.3337874659400546
user: 4 Movie: Dave (1993) Pretty Woman (1990) 3.407504887953076
user: 4 Movie: Dave (1993) American President, The (1995) 3.6997118155619595


In [None]:
# Define your Cypher query
q5 = """
MATCH (u1:Users)-[:WATCHED]->(m3:Movies)
WHERE u1.userId =~ '5'
WITH [i in m3.movieId | i] as movies
MATCH path = (u:Users)-[:WATCHED]->(m1:Movies)-[s:SIMILAR]->(m2:Movies),
(m2)-[:GENRES]->(g:Genres),
(u)-[:FAVORITE]->(g)
WHERE u.userId =~ '5' and not m2.movieId in movies
RETURN distinct u.userId as userId, g.genres as genres,
m2.title as title, m2.rating_mean as rating
ORDER BY m2.rating_mean DESCENDING
LIMIT 5
"""

# Establish a session and run the query
with GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password")) as driver:
    with driver.session() as session:
        result = session.run(q5)

        # Iterate through the result and print each record
        for record in result:
            print("user:",record["userId"],'genre:', record["genres"], 'movie:',record["title"],'rating:', record["rating"])


user: 5 genre: Drama movie: Godfather, The (1972) rating: 4.36725018234865
user: 5 genre: Drama movie: Godfather, The (1972) rating: 4.351879054963888
user: 5 genre: Drama movie: Schindler's List (1993) rating: 4.306500301871604
user: 5 genre: Drama movie: Schindler's List (1993) rating: 4.301384875958952
user: 5 genre: Drama movie: Casablanca (1942) rating: 4.280866721177433
