# PART B

In [1]:
pip install neo4j

Collecting neo4j
  Using cached neo4j-5.18.0.tar.gz (198 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: neo4j
  Building wheel for neo4j (pyproject.toml) ... [?25ldone
[?25h  Created wheel for neo4j: filename=neo4j-5.18.0-py3-none-any.whl size=273863 sha256=0081b75f79556db1c9aded6e12f52fbeb536372892fed8a20f30fed4d9bb0a2f
  Stored in directory: /Users/onur/Library/Caches/pip/wheels/e7/e1/a0/dd7c19192f5383ff57d02a6c126cbfe4b7b2ae82f70c6994ce
Successfully built neo4j
Installing collected packages: neo4j
Successfully installed neo4j-5.18.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upg

In [4]:
from neo4j import GraphDatabase

URI = "neo4j://localhost"
AUTH = ("neo4j", "12345678")

driver = GraphDatabase.driver(URI, auth=AUTH)
driver.verify_connectivity()

### 1. Find the top 3 most cited papers of each conference.

In [5]:
def find_top_cited_papers():
    with driver.session() as session:
        result = session.run("""
        MATCH (p:paper)-[:PRESENTED_IN]->(c:conference)
        OPTIONAL MATCH (p)<-[:CITES]-(citingPaper)
        WITH p, c, COUNT(citingPaper) as citations
        ORDER BY c.name, citations DESC
        WITH c.name as conferenceName, COLLECT(p)[0..3] as topCitedPapers
        RETURN conferenceName, [paper IN topCitedPapers | paper.title] AS paperTitles
        """)
        return [record["conferenceName"] + ": " + str(record["paperTitles"]) for record in result]

top_cited_papers = find_top_cited_papers()
for paper in top_cited_papers:
    print(paper)

ACM-SIAM Symposium on Discrete Algorithms: ['A Simple Algorithm for the Graph Minor Decomposition - Logic meets Structural Graph Theory']
Annual Conference on Genetic and Evolutionary Computation: ['Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science']
Asia-Pacific Software Engineering Conference: ['A Map of Threats to Validity of Systematic Literature Reviews in Software Engineering', 'Eye-Tracking Metrics in Software Engineering']
Conference on Software Engineering Education and Training: ['Software Engineering Education: Converging with the Startup Industry']
EUROMICRO Conference on Software Engineering and Advanced Applications: ['Software Engineering Challenges of Deep Learning']
European Workshop on Probabilistic Graphical Models: ['Graph Theory']
Frontiers in Education Conference: ['Smart Blockchain Badges for Data Science Education']
Human Language Technology - The Baltic Perspectiv: ['WordNet: A Lexical Database for English']
IEEE Aerospace Confer

### 2. For each conference find its community: i.e., those authors that have published papers on that conference in, at least, 4 different editions.

In [6]:
def find_conference_communities():
    with driver.session() as session:
        result = session.run("""
        MATCH (a:author)-[:WRITES]->(:paper)-[:PRESENTED_IN]->(c:conference)
        WITH a.name AS authorName, c.name AS conferenceName, COUNT(DISTINCT c.edition) AS editionCount
        WHERE editionCount >= 4
        RETURN conferenceName, COLLECT(authorName) AS communityAuthors
        ORDER BY conferenceName
        """)
        return [(record["conferenceName"], record["communityAuthors"]) for record in result]

conference_communities = find_conference_communities()
for conferenceName, communityAuthors in conference_communities:
    print(f"{conferenceName}: {communityAuthors}")

IEEE International Conference on Computer Vision: ['J. Gauglitz', 'A. Jarmusch', 'Madeleine Ernst', 'T. Kosciólek', 'Massoud Maher', 'A. Melnik']
IEEE Working Conference on Mining Software Repositories: ['T. Kosciólek', 'A. Jarmusch', 'Massoud Maher', 'J. Gauglitz']
International Conference on Human Factors in Computing Systems: ['A. Jarmusch', 'T. Kosciólek', 'A. Melnik', 'Madeleine Ernst', 'Massoud Maher', 'J. Gauglitz']
International Symposium on Intelligent Data Analysis: ['Madeleine Ernst', 'J. Gauglitz', 'A. Melnik', 'T. Kosciólek', 'A. Jarmusch', 'Massoud Maher']


### 3. Find the impact factors of the journals in your graph (see https://en.wikipedia. org/wiki/Impact_factor, for the definition of the impact factor).

 The impact factor of a journal is calculated by dividing the number of citations in the current year to articles published in the two previous years by the total number of articles published in the two previous years.

I have calculated for year 2022, since it gives more better results. 

In [7]:
def calculate_journal_impact_factors():
    with driver.session() as session:
        result = session.run("""
          WITH 2022 AS currentYear
          MATCH (j:journal)<-[:PUBLISHED_IN]-(p:paper)
          WHERE p.year IN [currentYear - 1, currentYear - 2]
          WITH j, p, currentYear
          OPTIONAL MATCH (p)<-[:CITES]-(citingPaper:paper)
          WHERE citingPaper.year = currentYear
          WITH j, currentYear, COUNT(citingPaper) AS citations, COLLECT(p) AS papersPublished
          RETURN j.name AS journalName,
               citations,
               SIZE(papersPublished) AS articlesPublished,
               CASE WHEN SIZE(papersPublished) > 0 THEN citations * 1.0 / SIZE(papersPublished) ELSE 0 END AS impactFactor
          ORDER BY impactFactor DESC
                             """)


        return [(record["journalName"], record["impactFactor"]) for record in result]

impact_factors = calculate_journal_impact_factors()
for journal, impact_factor in impact_factors:
    print(f"Journal: {journal}, Impact Factor: {impact_factor}")

Journal: Journal of Open Source Software, Impact Factor: 1.0
Journal: IEEE Transactions on Knowledge and Data Engineering, Impact Factor: 0.07692307692307693
Journal: Journal of Big Data, Impact Factor: 0.07692307692307693
Journal: Journal of Epidemiology and Community Health, Impact Factor: 0.07692307692307693
Journal: Journal of Statistics and Data Science Education, Impact Factor: 0.07692307692307693
Journal: Nature Biotechnology, Impact Factor: 0.0
Journal: SN Computer Science, Impact Factor: 0.0
Journal: Genome Biology, Impact Factor: 0.0
Journal: Communications in Computer and Information Science, Impact Factor: 0.0
Journal: IEEE Transactions on Artificial Intelligence, Impact Factor: 0.0
Journal: Chemical Reviews, Impact Factor: 0.0
Journal: Journal of Geographical Systems, Impact Factor: 0.0
Journal: International Journal of Digital Earth, Impact Factor: 0.0
Journal: IEEE Transactions on Signal Processing, Impact Factor: 0.0
Journal: Scientific Data, Impact Factor: 0.0
Journal:

### 4. Find the h-indexes of the authors in your graph (see https://en.wikipedia.org/ wiki/H-index, for a definition of the h-index metric).

1. For each author, find all the papers they've written.
2. For each paper, count the number of times it has been cited.
3. Order the papers by the number of citations in descending order.
4. Find the highest rank where the paper's citation count is at least as high as the rank.

In [8]:
def calculate_authors_h_indices():
    with driver.session() as session:
        result = session.run("""
        MATCH (a:author)-[:WRITES]->(p:paper)
        OPTIONAL MATCH (p)<-[:CITES]-(citing:paper)
        WITH a, p, COUNT(citing) AS citations
        ORDER BY citations DESC
        WITH a, COLLECT(citations) AS citationCounts
        WITH a, citationCounts, RANGE(0, SIZE(citationCounts)-1) AS indices
        UNWIND indices AS idx
        WITH a, citationCounts, idx
        WHERE citationCounts[idx] >= idx + 1
        WITH a.name AS authorName, MAX(idx + 1) AS hIndex
        RETURN authorName, hIndex
        ORDER BY hIndex DESC
        """)
        return [(record["authorName"], record["hIndex"]) for record in result]

h_indices = calculate_authors_h_indices()
for author, h_index in h_indices:
    print(f"Author: {author}, H-index: {h_index}")

Author: Madeleine Ernst, H-index: 19
Author: T. Kosciólek, H-index: 18
Author: A. Melnik, H-index: 16
Author: A. Jarmusch, H-index: 16
Author: Massoud Maher, H-index: 16
Author: J. Gauglitz, H-index: 12
Author: C. Wohlin, H-index: 7
Author: A. Bateman, H-index: 5
Author: R. Finn, H-index: 5
Author: A. Marchler-Bauer, H-index: 5
Author: P. Runeson, H-index: 5
Author: Martin Höst, H-index: 5
Author: P. Brereton, H-index: 5
Author: Jaina Mistry, H-index: 4
Author: E. Sonnhammer, H-index: 4
Author: A. Torralba, H-index: 4
Author: T. Tatusova, H-index: 4
Author: N. Thanki, H-index: 4
Author: D. Budgen, H-index: 4
Author: B. Kitchenham, H-index: 4
Author: R. Lempicki, H-index: 4
Author: B. Sherman, H-index: 4
Author: N. Mulder, H-index: 4
Author: Samir Passi, H-index: 3
Author: Dachuan Zhang, H-index: 3
Author: R. Yamashita, H-index: 3
Author: M. Gwadz, H-index: 3
Author: David I. Hurwitz, H-index: 3
Author: Shennan Lu, H-index: 3
Author: Chanjuan Zheng, H-index: 3
Author: C. Lanczycki, H-in