In [1]:
from neo4j import GraphDatabase
import pandas as pd

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 50)

In [2]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [3]:
# URI examples: "neo4j://localhost", "neo4j+s://xxx.databases.neo4j.io"
URI = "neo4j://localhost:7687"
AUTH = ("neo4j", "12345678")

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()

conn = Neo4jConnection(uri=URI, user=AUTH[0], pwd=AUTH[1])

<h3>Find the top 3 most cited papers of each conference</h3>

In [4]:
# Optimization of the query above

query = ''' 
   MATCH (p1:Paper)-[:CITES]->(p2:Paper)-[:PRESENTED_IN]->(:Edition)--(c:Conference)
   WITH c, p2, count(*) as cites
   ORDER BY c.name, cites DESC
   WITH c, collect({title: p2.title, citations: cites}) as papers
   UNWIND papers[0..3] as paper
   RETURN c.id AS conference_id, paper.title AS paper_title, paper.citations AS num_citations
    '''

res = conn.query(query=query)
df = pd.DataFrame(res, columns=["Conference", "Paper Title", "Number of Citations"])
df

Unnamed: 0,Conference,Paper Title,Number of Citations
0,NeurIPS,The Ugly Truth About Ourselves and Our Robot Creations: The Problem of Bias and Social Inequity.,25
1,NeurIPS,Misconduct and Misbehavior Related to Authorship Disagreements in Collaborative Science.,21
2,NeurIPS,"Knowledge, Attitudes, and Practices of Plagiarism as Reported by Participants Completing the AuthorAID MOOC on Research Writing.",19
3,AAAI,"Leadership, Engineering and Ethical Clashes at Boeing.",24
4,AAAI,Using Insights from Applied Moral Psychology to Promote Ethical Behavior Among Engineering Students and Professional Engineers.,21
5,AAAI,How to Weigh Values in Value Sensitive Design: A Best Worst Method Approach for the Case of Smart Metering.,21
6,ACL,"Correction to: Mark Coeckelbergh, AI Ethics, Mit Press, 2021.",23
7,ACL,Startup Ethics: Ethically Responsible Conduct of Scientists and Engineers at Theranos.,21
8,ACL,Connecting Past with Present: A Mixed-Methods Science Ethics Course and its Evaluation.,21
9,ICML,Stochastic Modeling of Server Capacity Utilization by Geometric Sums,21


<h3>For each conference find its community: i.e., those authors that have published papers
on that conference in, at least, 4 different editions</h3>

In [13]:
query =''' MATCH (aut:Author)-[:WROTE]->(p:Paper)-[:PRESENTED_IN]->(e:Edition)--(c:Conference)
           WITH aut, c, count(e) as num_editions
           ORDER BY num_editions DESC
           WHERE num_editions >= 4
           RETURN c.id, aut.name AS author_name,  num_editions
'''
res = conn.query(query=query)
df = pd.DataFrame(res, columns=["Conference", "Author Name", "Number of Editions"])
df

Unnamed: 0,Conference,Author Name,Number of Editions
0,NeurIPS,Christoph Meinel,7
1,ACL,Christoph Meinel,6
2,ICML,Christoph Meinel,5
3,CVPR,Dieter Baum,5
4,NeurIPS,Christoph Beierle,5
5,AAAI,Christoph Meinel,4
6,AAAI,Carsten Damm,4


<h3>Find the impact factors of the journals in your graph (see https://en.wikipedia.org/wiki/Impact_factor, for the definition of the impact factor)</h3>

In [14]:
# Updated query
query =''' MATCH (j:Journal)
           UNWIND [1990, 1999, 2000, 2001, 2002, 2003, 2004, 2018] as year
           WITH j, year

           MATCH (p1:Paper)-[cit:CITES]->(p2:Paper)-[:BELONGS_TO]->(v:Volume)-[pub:PUBLISHED_IN]-(j)
           WHERE pub.year = year
           WITH j, year, count(cit) AS num_citations
           ORDER BY j, num_citations
           WITH j, year, num_citations

           MATCH (p3:Paper)-[:BELONGS_TO]->(v:Volume)-[pub1:PUBLISHED_IN]-(j)
           WHERE pub1.year > year - 3 AND pub1.year < year
           WITH j, year, num_citations, count(p3) AS num_publications
           RETURN j.name, j.id, year, num_citations, num_publications, toFloat(num_citations)/num_publications
'''
res = conn.query(query=query)
df = pd.DataFrame(res, columns=["Journal", "ID", "Year", "Number Citations", "Number Publications", "Impact Factor"])
df

Unnamed: 0,Journal,ID,Year,Number Citations,Number Publications,Impact Factor
0,GTE Laboratories Incorporated,59943403,1990,3,2,1.5
1,IWBS Report,25355511,1990,223,34,6.558824
2,"Universität Trier, Mathematik/Informatik, Forschungsbericht",94510548,2003,59,42,1.404762
3,"Universität Trier, Mathematik/Informatik, Forschungsbericht",94510548,2004,64,29,2.206897
4,"Universität Trier, Mathematik/Informatik, Forschungsbericht",94510548,2000,128,59,2.169492
5,"Universität Trier, Mathematik/Informatik, Forschungsbericht",94510548,2001,197,42,4.690476
6,"Universität Trier, Mathematik/Informatik, Forschungsbericht",94510548,2002,239,32,7.46875
7,"Universität Trier, Mathematik/Informatik, Forschungsbericht",94510548,1999,319,57,5.596491
8,Sci. Eng. Ethics,98613495,2018,534,79,6.759494


<h3>Find the h-indexes of the authors in your graph (see https://en.wikipedia.org/wiki/H-index, for a definition of the h-index metric)</h3>

In [15]:
query =''' MATCH (a:Author)-[:WROTE]->(p1:Paper)<-[:CITES]-(p2:Paper)
           WITH a, p1, count(*) as num_citations 
           ORDER BY num_citations DESC
           WITH a, collect(num_citations) as list_num_citations
           WITH a, list_num_citations, [x in range(0, size(list_num_citations)-1) WHERE x < list_num_citations[x] | x+1] AS res
           ORDER BY res[-1] DESC
           RETURN a.name, res[-1]
'''
res = conn.query(query=query)
df = pd.DataFrame(res, columns=["Author Name", "H-index"])
df

Unnamed: 0,Author Name,H-index
0,Christoph Meinel,12
1,Dieter Baum,10
2,Alexander Kaplan,9
3,Rainer Tichatschke,9
4,Anna Slobodová,9
...,...,...
1272,C. J. Date 0001,1
1273,Jan-J. Rückmann,1
1274,Vera Watson,1
1275,Carola Eschenbach,1


In [16]:
conn.close()