In [66]:
# install Neo driver 
!pip install neo4j

import pandas as pd
from neo4j import GraphDatabase

# Create a connection class
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd),encrypted=False)
        except Exception as e:
            print("Failed to create the driver:", e)      
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:\n", e)
        finally: 
            if session is not None:
                session.close()
        return response
    
    def close(self):
        if self.__driver is not None:
            self.__driver.close()





In [73]:
url = 'https://raw.githubusercontent.com/metrica-sports/sample-data/master/data/Sample_Game_1/Sample_Game_1_RawEventsData.csv'

events = pd.read_csv(url, delimiter = ",")
q_load_CSV = "LOAD CSV WITH HEADERS FROM '" + url + "' AS csvLine "
q_create_Players="""
MERGE(p:Player{team:(csvLine.Team), name:(csvLine.From)})
"""
q_create_PASS="""
MATCH (p:Player{team:(csvLine.Team), name:(csvLine.From)}), (p2:Player{name:(csvLine.To)})
WHERE exists(csvLine.To)
CALL apoc.create.relationship(p, csvLine.Type, {start_time:TOFLOAT(csvLine.`Start Time [s]`), end_time:TOFLOAT(csvLine.`End Time [s]`)}, p2)
YIELD rel
RETURN rel
"""

# returns Player who had the ball before 
def find_challenged_pl(current_index, challenging_pl) : 
    current_row=events.iloc[current_index]
    prev_row=events.iloc[current_index-1]

    if prev_row['To'] != challenging_pl and  str(prev_row['To'])  != 'nan':
        return(prev_row['To'] )
        
    elif str(prev_row['To']) == 'nan' and prev_row['From'] != challenging_pl :
        return prev_row['From'] 
    else :
        return find_challanged_pl(current_index-1, challenging_pl)

# 
def create_relation(relation, conn):
    for index, row in events.iterrows():
        print(relation)

        if row['Type'] == relation :
            print(index)

            challenging = row['From']
            challenged = find_challenged_pl(index, challenging)
            print(challenged)
            match = "MATCH (p1:Player{name:'" + challenging + "'}), " + "(p2:Player{name: '" + challenged + "'}) "
            relation = "CREATE (p1)-[r:" + relation + "{start_time: " + str(row['Start Time [s]']) + ", end_time: " + str(row['End Time [s]']) + ", subtype: '" + str(row.Subtype) + "'}]->(p2)"
            q_relation = match + relation
            conn.query(q_relation, 'neo4j')
            print(q_relation)
        


In [81]:
'''
Reading CSV file creates Player nodes with relations: PASS, CHALLENGE and RECOVERY between them
'''def populate() :
    conn = Neo4jConnection(uri="bolt://localhost:7687", user="neo4j", pwd="Qwerty!234")
    conn.query("CREATE OR REPLACE DATABASE neo4j", 'neo4j')
    conn.query(q_load_CSV + q_create_Players, 'neo4j')
    conn.query(q_load_CSV + q_create_PASS, 'neo4j')
   

    for index, row in events.iterrows():    

        if row['Type'] == 'CHALLENGE' :
            challenging = row['From']
            challenged = find_challenged_pl(index, challenging)
            match = "MATCH (p1:Player{name:'" + challenging + "'}), " + "(p2:Player{name: '" + challenged + "'}) "
            relation = "CREATE (p1)-[r:CHALLENGE{start_time: " + str(row['Start Time [s]']) + ", end_time: " + str(row['End Time [s]']) + ", subtype: '" + str(row.Subtype) + "'}]->(p2)"
            q_relation = match + relation
            conn.query(q_relation, 'neo4j')
            
        if row['Type'] == 'RECOVERY' :
            challenging = row['From']
            challenged = find_challenged_pl(index, challenging)
            match = "MATCH (p1:Player{name:'" + challenging + "'}), " + "(p2:Player{name: '" + challenged + "'}) "
            relation = "CREATE (p1)-[r:RECOVERY{start_time: " + str(row['Start Time [s]']) + ", end_time: " + str(row['End Time [s]']) + ", subtype: '" + str(row.Subtype) + "'}]->(p2)"
            q_relation = match + relation
            conn.query(q_relation, 'neo4j')
            
    conn.close()
    print("END")

In [82]:
populate()

END
