# Load Queries Notebook

In [1]:
# Import necessary packages 
from neo4j import GraphDatabase
import time
from datetime import timedelta as td

In [18]:
# Test notebook run time
start_time = time.time()

In [19]:
# Instantiate driver object
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "yxie367"))

#### Clearing database

In [20]:
with driver.session() as session:
    session.run('MATCH (n) DETACH DELETE n')   # Uncomment this line to delete everything from the database
    session.run('DROP CONSTRAINT `orgNameConstraint` IF EXISTS')
    session.run('CREATE CONSTRAINT orgNameConstraint ON (org:Organization) ASSERT org.name IS UNIQUE')


#### Organization

In [21]:
# Creates organization nodes from list of unique institutions
with driver.session() as session:
    session.run('''
    LOAD CSV WITH HEADERS FROM "file:///master_2020_affiliations.csv" AS row
    WITH row WHERE NOT row.organization IS null
    MERGE (o:Organization {orgName:row.organization})
        RETURN count(o);
    ''')

#### Authors

In [22]:
# Create nodes for people from Web of Science Data
with driver.session() as session:
    session.run('''
    LOAD CSV WITH HEADERS FROM 'file:///master_2020_authors.csv' AS row
    MERGE (p:Person {fullName:row.wosFullName})
    SET p.firstName = row.wosFirstName,
    p.lastName = row.wosLastName, 
    p.Email = row.emailAddress
    ''')

#### Person AFFILIATED_WITH Organization

In [23]:
# Create relationship Person AFFILIATED_WITH Organization for Orcid
with driver.session() as session:
    session.run('''
    LOAD CSV WITH HEADERS FROM 'file:///master_2020_affiliations.csv' AS row 
    WITH row['organization'] as orgName, row["fullName"] as full
    MATCH (p:Person {fullName:full})
    MATCH (o:Organization {orgName:orgName})
    MERGE (p)-[aw:AFFILIATED_WITH]->(o)
    ''')

#### Publications

In [24]:
# Create nodes for Publications from Web of Science data
with driver.session() as session:
    session.run('''
    LOAD CSV WITH HEADERS FROM 'file:///master_2020_articles.csv' AS row 
    WITH row['title'] as title, row["date"] as date, row["docID"] as docID, row["abstract"] as abstract
    MERGE (pu:Publication {docID:docID})
    SET pu.title = title, pu.date = date, pu.abstract = abstract
    ''')

#### Person CO_AUTHOR of PUBLICATION

In [25]:
# Creates Perosn COAUTHOR_OF Publication relationship from Web of Science data
with driver.session() as session:
    session.run('''
    LOAD CSV WITH HEADERS FROM 'file:///master_2020_authors.csv' AS row 
    MATCH (p:Person {fullName:row.wosFullName})
    MATCH (pu:Publication {docID:row.docID})
    MERGE (p)-[ca:COAUTHOR_OF]->(pu)
    ''')

#### Research Keyword Topic

In [26]:
# Creating  topic nodes from MSI Research data (Toya)
with driver.session() as session:
    session.run('''
    LOAD CSV WITH HEADERS FROM 'file:///master_2020_keywords.csv' AS row
    WITH row WHERE NOT row.keyword IS null
    MERGE (t:Topic {name:row.keyword})
    ''')

#### Publication RELATED_TO topic

In [27]:
# Creating Publication RELATED_TO Topic from ORCID Data
with driver.session() as session:
    session.run('''
    LOAD CSV WITH HEADERS FROM 'file:///master_2020_keywords.csv' AS row 
    MATCH (pu:Publication {docID:row.docID})
    MATCH (t:Topic {name:row.keyword})
    MERGE (pu)-[r:RELATED_TO]->(t)
    RETURN count(r)
    ''')

#### Organization HAS_CAPABILITIES_IN topic

In [28]:
# Creates relationship between Organization and Topic when Person AFFILATED_WITH organization is COAUTHOR_OF a Publication with said Topic
with driver.session() as session:
    session.run('''
    MATCH (o1:Organization)-[aff1:AFFILIATED_WITH]-(p1:Person)-[co1:COAUTHOR_OF]-(pub1:Publication)-[re:RELATED_TO]-(t:Topic)
    MERGE(o1)-[i:HAS_CAPABILITIES_IN {topic:t.name}]->(t)
    ''')

#### Person "INTERESTED_IN" Topic

In [29]:
# Creates relationship between Person and Topic when Person COAUTHOR_OF a Publication with said Topic
with driver.session() as session:
    session.run('''
    MATCH (p1:Person)-[co1:COAUTHOR_OF]-(pub1:Publication)-[re:RELATED_TO]-(t:Topic)
    MERGE(p1)-[i:INTERESTED_IN {topic:t.name}]->(t)
    ''')

In [30]:
time_took = time.time() - start_time
print(f"Total runtime: {td(seconds=round(time_took))}")

Total runtime: 0:00:06
