In [1]:
import pandas as pd
from neo4j import GraphDatabase
import csv
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import pairwise_distances
import ast


In [4]:
# read sample and scores matrix

sample = pd.read_csv("data/sample.csv")
rec_scores = pd.read_csv("data/scores_matrix.csv", sep=" ")


In [5]:
rec_scores

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999
0,1.000000,0.045370,0.046169,0.00000,0.004542,0.000000,0.041667,0.050000,0.006731,0.007784,...,0.055803,0.050000,0.035714,0.045236,0.074890,0.000000,0.006462,0.044794,0.000000,0.000000
1,0.045370,1.000000,0.056569,0.00000,0.000000,0.000000,0.054084,0.062500,0.009307,0.050000,...,0.175092,0.076976,0.041667,0.125000,0.083333,0.007322,0.000000,0.125000,0.077099,0.032494
2,0.046169,0.056569,1.000000,0.00000,0.000000,0.007144,0.003006,0.000000,0.000000,0.129733,...,0.062500,0.005018,0.100000,0.010722,0.000000,0.008301,0.000000,0.125000,0.062500,0.009268
3,0.000000,0.000000,0.000000,1.00000,0.250000,0.000000,0.086890,0.004279,0.000000,0.083333,...,0.029948,0.000000,0.062500,0.000000,0.000000,0.125000,0.250000,0.000000,0.125000,0.088540
4,0.004542,0.000000,0.000000,0.25000,1.000000,0.000000,0.083333,0.000000,0.000000,0.083333,...,0.000000,0.000000,0.062500,0.000000,0.000000,0.132516,0.250000,0.000000,0.130717,0.062500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0.000000,0.007322,0.008301,0.12500,0.132516,0.000000,0.071638,0.000000,0.000000,0.062500,...,0.010550,0.009993,0.058309,0.000000,0.000000,1.000000,0.147623,0.000000,0.088772,0.132897
1996,0.006462,0.000000,0.000000,0.25000,0.250000,0.000000,0.088472,0.000000,0.000000,0.083333,...,0.014703,0.000000,0.067903,0.000000,0.000000,0.147623,1.000000,0.000000,0.125000,0.062500
1997,0.044794,0.125000,0.125000,0.00000,0.000000,0.000000,0.050000,0.062500,0.005808,0.131098,...,0.166667,0.065475,0.041667,0.085107,0.087883,0.000000,0.000000,1.000000,0.062500,0.000000
1998,0.000000,0.077099,0.062500,0.12500,0.130717,0.000000,0.064963,0.010671,0.000000,0.166667,...,0.089592,0.000000,0.125000,0.000000,0.005415,0.088772,0.125000,0.062500,1.000000,0.057734


In [6]:
# get unique genres
genres = []
sample['genres'].apply(lambda row: genres.extend(ast.literal_eval(row)))

# get unique directors
directors = []
sample['Director'].apply(lambda row: directors.extend(ast.literal_eval(row)))


genres = set(genres)
directors = set(directors)

In [7]:
directors

{'Mike Young',
 'Mabrouk El Mechri',
 'Bryan Singer',
 'Xavier S. Puslowski',
 'Richard Quine',
 'Lucía Puenzo',
 'Sammo Hung',
 'Micael Preysler',
 'Peter Berg',
 'Jihun Kim',
 'Tatsuya Ishihara',
 'Mick Jackson',
 'Craig Atkinson',
 'Daniel Lind Lagerlöf',
 'Jerry Schatzberg',
 'Dewey Nicks',
 'Julie Bertuccelli',
 'Gregory Hoblit',
 'Reggie Rock Bythewood',
 'Douglas Tirola',
 'Francesco Rosi',
 'Ben Verbong',
 'Michael Curtiz',
 'Paris Leonti',
 'Ari Novak',
 'Damon Santostefano',
 'Milan Luthria',
 'Leslie Goodwins',
 'Jonathan Parker',
 'Andrew Jarecki',
 'Ted Bafaloukos',
 'LazRael Lison',
 'Keoni Waxman',
 'Alexander Sokurov',
 'Tokuzô Tanaka',
 'Paul Warner',
 'Marcus Warren',
 'Tobe Hooper',
 'Agnès Jaoui',
 'Jim Hanon',
 'Paul Sen',
 'Kevin Lima',
 'Sam Mendes',
 'Robert Mulligan',
 'John Pasquin',
 'Donald G. Jackson',
 'Cao Hamburger',
 'Peter Glenville',
 'Aleksandr Kott',
 'James Wong',
 'Andrew Rossi',
 'Jason Bourque',
 'Yvan Attal',
 'Aleksey Balabanov',
 'Jonathan En

## Setting up graph database

In [10]:
uri = "neo4j://localhost:7687"
user = "neo4j"
password = "vampire-float-olivia-maestro-sleep-4222" 

#### Make sure you can run this on your browser - initiate a Neo4j server ####

In [11]:
driver = GraphDatabase.driver(uri, auth=(user, password))

In [43]:
# kill switch
def delete_all_data(tx):
    tx.run("MATCH (n) DETACH DELETE n")

with driver.session() as session:
    session.write_transaction(delete_all_data)

driver.close()

  session.write_transaction(delete_all_data)


### Movie node

In [44]:
def create_movie_node(tx, row):
    tx.run("""
    CREATE (:movie {
            title: $title,
            overview: $overview,
            vote_average: $vote_average, 
            vote_count: $vote_count

    })
    """, title=row['title'], overview=row['overview'], vote_average=row['vote_average'],
                vote_count=row['vote_count'])
    

with driver.session() as session:
    for index, row in sample.iterrows():
        session.write_transaction(create_movie_node, row)

    

  session.write_transaction(create_movie_node, row)


### Director Node

In [None]:
def create_director_node(tx, row):
    tx.run("""
    
    
    CREATE (:director {
            name
    })
    
    
    """)