In [None]:
# Import libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
from neo4j import __version__ as neo4j_version
from neo4j import GraphDatabase
import os
import os.path
import ssl
import stat
import subprocess
import sys

ssl._create_default_https_context = ssl._create_unverified_context

In [None]:
#Define the Neo4j connection class

class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [None]:
# Extract names from cells
def getNames(text):
    names = []
    
    textSplit = text.split(',')
    
    for t in textSplit:
        t_list = t.split(' and ')
        
        for tl in t_list:
            if len(tl)>0:
                tl = tl.strip()
                names.append(tl)
    return(names)

In [None]:
### CONNECTING TO NEO4J SERVER ###

# Note that this is running locally on my laptop
# Server must be running for any Neo4j interactions to work

# Get uri, login and password from file 
f=open("neo4j.txt","r")
lines=f.readlines()
uri = lines[0].rstrip()
NEOusername=lines[1].rstrip()
NEOpassword=lines[2].rstrip()
f.close()

# Connect to Neo4j
conn = Neo4jConnection(uri="bolt://localhost:7687", user=NEOusername, pwd=NEOpassword)

In [None]:
# Pull tables from Wikipedia
url = 'https://en.wikipedia.org/wiki/List_of_Hallmark_Channel_Original_Movies'
df_list = pd.read_html(url)

In [None]:
for df in df_list[7:26]:
    for ind in df.index:
        movie = df['Movie'][ind]
        stars = df['Starring'][ind]
        directors = df['Director'][ind]
        date = df['Original airdate'][ind]
        
        stars_list = getNames(stars)
        directors_list = getNames(directors)
        
        print(movie)
        
        # Create movie node
        query_string = "CREATE (m:Movie {Name:\""+movie+"\", Date:\""+date+"\"})"
        conn.query(query_string, db='neo4j')
        
        # Merge stars node and create relationships
        
        for s in stars_list:
            query_string = "MERGE (p:Person {Name:\""+s+"\"})"                
            conn.query(query_string, db='neo4j')
            
            query_string = "MATCH (p:Person), (m:Movie) WHERE p.Name = \""+s+"\" AND m.Name =\""+movie+"\" CREATE (p)-[r:ACTED_IN]->(m)"
            conn.query(query_string, db='neo4j')
            
        
        # Merge director nodes
        
        for d in directors_list:
            query_string = "MERGE (p:Person {Name:\""+d+"\"})"                
            conn.query(query_string, db='neo4j')

            query_string = "MATCH (p:Person), (m:Movie) WHERE p.Name = \""+d+"\" AND m.Name =\""+movie+"\" CREATE (p)-[r:DIRECTED]->(m)"
            conn.query(query_string, db='neo4j')        
        
        
        
        
        