In [1]:
from neo4j import GraphDatabase
import pandas as pd
import numpy as np

In [2]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [3]:
# URI examples: "neo4j://localhost", "neo4j+s://xxx.databases.neo4j.io"
URI = "neo4j://localhost:7687"
AUTH = ("neo4j", "12345678")

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()

conn = Neo4jConnection(uri=URI, user=AUTH[0], pwd=AUTH[1])

<h3>Adding Reviews</h3>
First, we retrieve all the reviewers and the papers they have reviewed.

In [4]:
query ='''match (a:Author)-[:REVIEWS]-> (p:Paper) return a.name, p.id'''
res = conn.query(query=query)
df = pd.DataFrame(res, columns=['reviewer', 'paper_id'])

We assign a random decision and a corresponding text (review) to each review.

The decision can be either "Accepted" or "Rejected".
Once the decision has been chosen, a corresponding text is randomly chosen.

In [5]:
decisions = {'Accepted':["Well researched and clearly presented.",    "Innovative and thought-provoking.",    "Solid contribution to the field.",    "Thorough and well-written.",    "Excellent methodology and analysis.",    "Significant and impactful findings.",    "Well-structured and coherent argument.",    "Valuable addition to the literature.",    "Original and insightful research.",    "Compelling and well-supported conclusions."], 
             'Rejected': ["Insufficient research design and methodology.",    "Lack of original contribution to the field.",    "Weak argument and analysis.",    "Inadequate data and sample size.",    "Unconvincing or unsupported conclusions.",    "Poorly written and difficult to understand.",    "Not a good fit for the journal's scope.",    "Limited relevance and significance.",    "Inconsistent or contradictory results.",    "Fundamental flaws in the study design."]}

for i, reviewer in df.iterrows():
    decision = np.random.choice(list(decisions.keys()))
    review = np.random.choice(decisions[decision])
    df.loc[df['reviewer'] == reviewer['reviewer'], 'decision'] = decision
    df.loc[df['reviewer'] == reviewer['reviewer'], 'review'] = review    


Export the decisions and reviews

In [6]:
df.to_csv('../../../testData/sampleReviews.csv')

<h3>Assigning Affiliation To Authors</h3>
We retrieve the name of all the authors in our graph

In [7]:
query ='''match (a:Author) return a.name'''
res = conn.query(query=query)
df = pd.DataFrame(res, columns=['author'])

We assign a random affiliation type and a corresponding affiliation to each author.

First, the affiliation type is chosen, either "University" or "Company". Next, we choose a corresponding affiliation of that type.

In [8]:
affiliation = {'University': ["Harvard University",    "Massachusetts Institute of Technology",    "Stanford University",    "California Institute of Technology",    "Princeton University",    "Yale University",    "Columbia University",    "Duke University",    "University of California, Berkeley",    "University of Chicago",    "Cornell University",    "Johns Hopkins University",    "Northwestern University",    "University of Michigan, Ann Arbor",    "University of Pennsylvania",    "University of California, Los Angeles",    "University of Virginia",    "Dartmouth College",    "Brown University",    "University of North Carolina at Chapel Hill"],
             'Company': ["Google",    "IBM",    "Microsoft",    "Amazon",    "Apple",    "Intel",    "Facebook",    "Oracle",    "Hewlett Packard Enterprise",    "Cisco Systems",    "Dell Technologies",    "Johnson & Johnson",    "Bristol-Myers Squibb",    "Pfizer",    "Merck & Co.",    "Amgen",    "Gilead Sciences",    "Abbott Laboratories",    "Thermo Fisher Scientific",    "Regeneron Pharmaceuticals"]}

for i, author in df.iterrows():
    aff_type = np.random.choice(list(affiliation.keys()))
    aff = np.random.choice(affiliation[aff_type])
    df.loc[df['author'] == author['author'], 'affiliation_type'] = aff_type
    df.loc[df['author'] == author['author'], 'affiliation'] = aff  



We export the affiliation and their types.

In [9]:
df.to_csv('../../../testData/sampleAffiliations.csv')